<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="data-paper" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.180408.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Data Note</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Human-Reviewed Uzbek Legal Named Entity Recognition Dataset</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: awaiting peer review]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Saidov</surname>
                        <given-names>Bobur</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Funding Acquisition</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0009-0000-5540-2013</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Fayzullaeva</surname>
                        <given-names>Zarnigor</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Bazarova</surname>
                        <given-names>Umida</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a3">3</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Narkabilova</surname>
                        <given-names>Gulnoza</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a4">4</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Azizova</surname>
                        <given-names>Nasiba</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-8579-197X</uri>
                    <xref ref-type="aff" rid="a5">5</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Rustamova</surname>
                        <given-names>Feruzakhon</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a6">6</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Halimova</surname>
                        <given-names>Firuza</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <xref ref-type="aff" rid="a7">7</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Urgench State University named after Abu Rayhan Biruni, Urgench, Khorezm Province, Uzbekistan</aff>
                <aff id="a2">
                    <label>2</label>Tashkent University of Information Technologies named after Muhammad al-Khwarizmi, Tashkent, Tashkent Province, Uzbekistan</aff>
                <aff id="a3">
                    <label>3</label>Navoi State University, Navoi, Uzbekistan</aff>
                <aff id="a4">
                    <label>4</label>Fergana State University, Fergana, Uzbekistan</aff>
                <aff id="a5">
                    <label>5</label>Karshi State University, Qarshi, Kashkadarya Province, Uzbekistan</aff>
                <aff id="a6">
                    <label>6</label>Andijan State Institute of Foreign Languages, Andijan, Uzbekistan</aff>
                <aff id="a7">
                    <label>7</label>Samarkand State Institute of Foreign Languages, Samarkand, Uzbekistan</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:saidovboburbek9629@gmail.com">saidovboburbek9629@gmail.com</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>10</day>
                <month>6</month>
                <year>2026</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2026</year>
            </pub-date>
            <volume>15</volume>
            <elocation-id>909</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>23</day>
                    <month>5</month>
                    <year>2026</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2026 Saidov B et al.</copyright-statement>
                <copyright-year>2026</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/15-909/pdf"/>
            <abstract>
                <p>This article describes a human-reviewed Uzbek legal-domain named entity recognition (NER) dataset developed as a reusable resource for low-resource legal NLP. The release contains 12 entity categories: PER, ORG, LOC, DATE, MONEY, POSITION, DOCNO, LAW, COURT, BANK, TIN, and CADASTRE. The dataset is provided in XLSX, CSV, JSON, and JSONL formats and is structured into two complementary layers: a core subset of manually reviewable source-grounded records and an extended augmented subset used to support lower-frequency labels in training-oriented settings. The package also includes supporting documentation, split guidance, a data dictionary, and review-related metadata, including provenance, verification status, and quality flags. Character-level start and end offsets are included where recoverable. The release is intended to facilitate Uzbek legal NER research, resource curation, and transparent reuse under provenance-aware conditions.</p>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Uzbek language; legal named entity recognition; legal NLP; low-resource NLP; dataset; information extraction; synthetic augmentation; sequence labeling.</kwd>
            </kwd-group>
            <funding-group>
                <funding-statement>The author(s) declared that no grants were involved in supporting this work.</funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec id="sec1" sec-type="intro">
            <title>Introduction</title>
            <p>Named entity recognition (NER) is an important task in natural language processing and information extraction, especially in domains where entities carry legal, administrative, and operational value. In Uzbek legal and quasi-legal texts, entities such as persons, organizations, locations, dates, monetary amounts, document identifiers, legal references, courts, banks, tax identifiers, and cadastral identifiers are important for document understanding, indexing, retrieval, and downstream language technology applications.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup> However, Uzbek remains a low-resource language in legal-domain NER, and publicly reusable resources for this setting are still limited in both label coverage and release design.
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>
                </sup>
            </p>
            <p>The dataset described in this article was prepared as a reusable Uzbek legal-domain NER resource intended to support data curation, controlled reuse, and future resource development.
                <sup>
                    <xref ref-type="bibr" rid="ref3">3</xref>
                </sup> The release covers 12 entity categories: PER, ORG, LOC, DATE, MONEY, POSITION, DOCNO, LAW, COURT, BANK, TIN, and CADASTRE. In addition to the data records themselves, the package includes supporting documentation, provenance-aware metadata, review-related status fields, and synchronized exports in XLSX, CSV, JSON, and JSONL formats. Character-level start and end offsets are included where recoverable.
                <sup>
                    <xref ref-type="bibr" rid="ref4">4</xref>
                </sup>
            </p>
            <p>A central feature of the release is its layered structure. The package distinguishes between a core subset of manually reviewable source-grounded records and an extended augmented subset intended for training support in lower-frequency labels.
                <sup>
                    <xref ref-type="bibr" rid="ref5">5</xref>
                </sup> This separation was introduced to improve transparency and to make it easier for future users to distinguish source-grounded material from synthetic support data.
                <sup>
                    <xref ref-type="bibr" rid="ref6">6</xref>
                </sup> The release should therefore be interpreted as a human-reviewed, gold-ready resource rather than as a fully finalized gold-standard benchmark.
                <sup>
                    <xref ref-type="bibr" rid="ref7">7</xref>
                </sup>
            </p>
            <p>The present article focuses on describing the dataset, its construction logic, package organization, and validation-oriented release structure. The resource is intended to support Uzbek legal NER research, dataset organization, and transparent reuse under provenance-aware conditions.</p>
        </sec>
        <sec id="sec2">
            <title>Materials and methods</title>
            <sec id="sec3">
                <title>Dataset design and scope</title>
                <p>The dataset was designed as a reusable Uzbek legal-domain named entity recognition (NER) resource for low-resource information extraction research.
                    <sup>
                        <xref ref-type="bibr" rid="ref8">8</xref>
                    </sup> The release covers 12 entity categories: PER, ORG, LOC, DATE, MONEY, POSITION, DOCNO, LAW, COURT, BANK, TIN, and CADASTRE. The main goal of the dataset construction process was to create a provenance-aware and human-reviewable resource that can support dataset curation, transparent reuse, and training support under provenance-aware conditions in Uzbek legal NLP.
                    <sup>
                        <xref ref-type="bibr" rid="ref9">9</xref>
                    </sup>
                </p>
                <p>The release was organized as a multi-layer package rather than as a single flat table. In particular, the dataset distinguishes between:
                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>A core benchmark-oriented subset containing the most reviewable and source-grounded records, and</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>An extended augmented subset containing additional examples reserved for training support, especially for lower-frequency labels.
                                <sup>
                                    <xref ref-type="bibr" rid="ref10">10</xref>
                                </sup>
                            </p>
                        </list-item>
                    </list>
                </p>
                <p>This layered design was adopted to preserve methodological transparency and to prevent benchmark-oriented records from being mixed with augmentation-oriented material without explicit provenance tracking.</p>
            </sec>
            <sec id="sec4">
                <title>Data sources</title>
                <p>The dataset was compiled from Uzbek legal and quasi-legal texts collected from publicly accessible and reusable sources. Source selection was guided by legal relevance, practical reusability, and the need to cover both standard entity classes (such as persons, organizations, and locations) and legal-administrative entity classes (such as document numbers, legal references, tax identifiers, and cadastral identifiers).
                    <sup>
                        <xref ref-type="bibr" rid="ref11">11</xref>
                    </sup>
                </p>
                <p>Because the target schema includes specialized labels that are not uniformly represented in public texts, the collection process was label-aware. High-frequency entity classes such as PER, ORG, and LOC were gathered from broader institutional and formal texts, whereas lower-frequency and domain-specific classes such as BANK, COURT, TIN, and CADASTRE required more targeted retrieval.
                    <sup>
                        <xref ref-type="bibr" rid="ref12">12</xref>
                    </sup> Source-level provenance was preserved wherever possible through metadata fields such as Source_Name, Source_URL, Data_Origin, and Source_Group.</p>
            </sec>
            <sec id="sec5">
                <title>Record assembly and preprocessing</title>
                <p>After source collection, candidate records were assembled in a label-wise manner rather than through a single uniform pipeline. Intermediate label-specific tables were created first and then merged into a unified release structure.
                    <sup>
                        <xref ref-type="bibr" rid="ref13">13</xref>
                    </sup> This approach made it possible to monitor class coverage, identify low-resource labels early, and perform targeted refinement where required.</p>
                <p>During preprocessing, the dataset underwent several harmonization steps:
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Alignment of label-wise tables into a common schema;</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Sentence cleaning and normalization;</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Standardization of entity-bearing fields;</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Preservation of provenance metadata;</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Integration of review-related fields;</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Consolidation of repeated or redundant rows.</p>
                        </list-item>
                    </list>
                </p>
                <p>Each record in the final package was represented as a row containing a text context (Sentence), an associated entity mention (Extracted_Text), a label (Label), and supporting provenance, verification, and usage metadata. Whenever recoverable, character-level offsets (Start_Char and End_Char) were also included to facilitate later conversion into stricter span-based sequence-labeling formats. The principal record-level fields included in the released dataset are summarized in 
                    <xref ref-type="table" rid="T1">
Table 1</xref>. These fields describe not only the text and entity content of each record, but also its provenance, review status, and intended use within the release structure.</p>
                <table-wrap id="T1" orientation="portrait" position="float">
                    <label>
Table 1. </label>
                    <caption>
                        <title>Main fields provided in each dataset record.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Field</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Type</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Description</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Record_ID</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">unique record identifier</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Sentence</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">sentence or text snippet</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Extracted_Text</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">target entity mention</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Label</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">assigned entity category</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Start_Char</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">integer/null</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">start offset of entity</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">End_Char</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">integer/null</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">end offset of entity</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Source_Name</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">source identifier</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Source_URL</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">string/null</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">source link when available</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Data_Origin</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">provenance tag</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Source_Group</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">open-source or synthetic</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Is_Verified</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">verification indicator</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Verification_Status</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">current review state</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Gold_Status</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">benchmark role</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Quality_Flag</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">warning/quality note</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Recommended_Split</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">suggested subset assignment</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>The table summarizes the principal text, provenance, review, and release-management fields included in the dataset package.</p>
            </sec>
            <sec id="sec6">
                <title>Annotation schema</title>
                <p>The dataset uses a 12-class entity schema designed specifically for Uzbek legal and quasi-legal documents. The selected labels were intended to balance practical legal relevance with annotation interpretability. In addition to general-domain categories such as PER, ORG, LOC, and DATE, the schema includes legal-administrative categories such as DOCNO, LAW, COURT, BANK, TIN, and CADASTRE, which are important for legal document analysis, structured extraction, and retrieval-oriented NLP applications.</p>
                <p>To reduce ambiguity, the release documentation distinguishes between labels that may be superficially similar but functionally different in legal texts, such as ORG vs BANK, ORG vs COURT, DOCNO vs TIN, and DOCNO vs CADASTRE.
                    <sup>
                        <xref ref-type="bibr" rid="ref14">14</xref>
                    </sup> The full set of entity categories included in the release is summarized in 
                    <xref ref-type="table" rid="T2">
Table 2</xref>. This overview clarifies the scope of the annotation schema and highlights the legal-domain relevance of the selected labels.</p>
                <table-wrap id="T2" orientation="portrait" position="float">
                    <label>
Table 2. </label>
                    <caption>
                        <title>Entity labels included in the dataset.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Label</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Description</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Typical legal use</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">PER</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Person names</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">parties, signatories, representatives</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">ORG</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Organizations and institutions</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">companies, agencies, ministries</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">LOC</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Locations and administrative places</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">cities, regions, districts</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">DATE</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Explicit date expressions</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">agreement dates, deadlines</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">MONEY</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Monetary amounts</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">payments, contract values</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">POSITION</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Official positions and roles</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">director, manager</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">DOCNO</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Document identifiers</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">contract numbers, decree IDs</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">LAW</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Legal references</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">laws, codes, regulations</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">COURT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Judicial institutions</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">court names</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">BANK</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Banking institutions</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">payment banks</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">TIN</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Tax/personal identifiers</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">STIR, INN, JSHSHIR</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">CADASTRE</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Property/cadastral identifiers</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">cadastral numbers</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>The table lists the 12 named entity categories included in the release together with their general interpretation and typical legal-domain use. 
                    <italic toggle="yes">Abbreviations:</italic> PER, person; ORG, organization; LOC, location; DOCNO, document number; LAW, legal reference; COURT, judicial institution; BANK, banking institution; TIN, tax identification number; CADASTRE, cadastral identifier.</p>
            </sec>
            <sec id="sec7">
                <title>Review and validation workflow</title>
                <p>The release was prepared through a review-oriented refinement workflow rather than as a fully finalized gold-standard benchmark. After merging and harmonization, the records were screened using a staged quality-control procedure. Priority was given to:
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Rows with missing extracted entity text;</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Lower-frequency and structurally sensitive labels;</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Rows with incomplete provenance;</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Rows reserved for augmentation-oriented
 use.</p>
                        </list-item>
                    </list>
                </p>
                <p>Review decisions were recorded explicitly using categories such as keep, edit, drop, and move_to_augmented_only. The release also preserves row-level review metadata through fields such as Is_Verified, Verification_Status, Gold_Status, Eligibility, and Quality_Flag. This design makes the current version more transparent and easier to refine collaboratively in future iterations.
                    <sup>
                        <xref ref-type="bibr" rid="ref15">15</xref>
                    </sup>
                </p>
                <p>The present release should therefore be interpreted as a human-reviewed, gold-ready resource, rather than as a fully adjudicated final gold benchmark.</p>
            </sec>
            <sec id="sec8">
                <title>Synthetic augmentation</title>
                <p>To support labels with limited naturally available coverage in public Uzbek legal texts, a controlled synthetic augmentation strategy was applied selectively. Synthetic rows were introduced only for lower-frequency classes where source-grounded examples were insufficient for practical experimental support. The augmentation process followed a template-based generation strategy, designed to preserve legal-domain plausibility and label clarity.
                    <sup>
                        <xref ref-type="bibr" rid="ref16">16</xref>
                    </sup>
                </p>
                <p>Synthetic records were explicitly marked in the metadata and were kept separate from benchmark-oriented material. These rows are intended for training support only and should not be treated as equivalent to manually reviewable source-grounded examples in evaluation settings.</p>
            </sec>
            <sec id="sec9">
                <title>Export formats and package structure</title>
                <p>The final dataset was released in four synchronized formats: XLSX, CSV, JSON, and JSONL. These formats were chosen to support different user needs:
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>XLSX for manual inspection and metadata review,</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>CSV for tabular processing and descriptive analysis,</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>JSON for structured record-based storage,</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>JSONL for NLP pipelines and line-based machine processing.</p>
                        </list-item>
                    </list>
                </p>
                <p>In addition to the data files, the package includes supporting documentation such as a README, data dictionary, split description, known limitations, citation file, changelog, and license information. The main components of the released package and their practical roles are summarized in 
                    <xref ref-type="table" rid="T3">
Table 3</xref>. This table makes the multi-format structure of the release explicit and shows how the package supports both manual inspection and machine-readable reuse.</p>
                <table-wrap id="T3" orientation="portrait" position="float">
                    <label>
Table 3. </label>
                    <caption>
                        <title>Main files and formats included in the released dataset package.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Component</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Format</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Purpose</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Main dataset</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">XLSX</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">manual inspection and metadata review</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Main dataset</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">CSV</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">tabular processing and statistics</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Main dataset</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">JSON</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">structured record storage</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Main dataset</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">JSONL</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">NLP pipelines and batch processing</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">README</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">TXT/MD</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">package overview</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Data dictionary</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">CSV/TXT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">field explanation</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Split description</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">TXT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">train/dev/test usage guidance</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">Changelog</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">TXT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">version history</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">License</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">TXT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">legal reuse conditions</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>The table summarizes the principal package components, their file formats, and their intended practical role in reuse. 
                    <italic toggle="yes">Abbreviations:</italic> XLSX, Microsoft Excel workbook; CSV, comma-separated values; JSON, JavaScript Object Notation; JSONL, line-delimited JSON; TXT, plain text; MD, Markdown.</p>
            </sec>
            <sec id="sec10">
                <title>Software and reproducibility</title>
                <p>The dataset preparation, harmonization, and export workflow was carried out using standard spreadsheet and scripting tools. Structured preprocessing and export operations were performed using Python, version 3.10 with standard data-processing libraries such as pandas, version 1.5.3 and json, openpyxl, and re (regular expressions). The scripts and supporting files used for preprocessing, entity extraction, and formatting for the Zenodo release are included in the associated Zenodo repository: 
                    <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.19682709">https://doi.org/10.5281/zenodo.19682709</ext-link>.
                    <sup>
                        <xref ref-type="bibr" rid="ref17">17</xref>
                    </sup>
                </p>
                <p>No task-specific model training was required for the generation of the released tabular package itself; however, semi-automatic processing steps such as field normalization, recoverable span localization, and export conversion were performed using the above software environment. Parameters that may affect reproducibility include text normalization rules, row filtering criteria, provenance-based subset separation, and the logic used to assign Recommended_Split, Gold_Status, and Verification_Status fields.
                    <sup>
                        <xref ref-type="bibr" rid="ref18">18</xref>
                    </sup> The overall dataset construction and release workflow is illustrated in 
                    <xref ref-type="fig" rid="f1">
Figure 1</xref>.</p>
                <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                    <label>
Figure 1. </label>
                    <caption>
                        <title>Overview of the dataset construction and release workflow.</title>
                        <p>The workflow proceeds from source collection and label-wise candidate gathering to record assembly and preprocessing, review and validation, provenance separation, creation of the core and augmented subsets, export in XLSX, CSV, JSON, and JSONL formats, and final release via Zenodo. Abbreviations: XLSX, Microsoft Excel workbook; CSV, comma-separated values; JSON, JavaScript Object Notation; JSONL, line-delimited
 JSON.</p>
                    </caption>
                    <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/199014/78e14708-f3c2-43ea-ba01-f557e4902034_figure1.gif"/>
                </fig>
            </sec>
        </sec>
        <sec id="sec11">
            <title>Dataset validation</title>
            <p>The current release was validated through a staged quality-control and review workflow intended to improve structural consistency, provenance transparency, and practical reusability. Validation did not rely on a single binary accept/reject decision for the entire dataset; instead, it combined structural checks, review-oriented prioritization, and record-level status tracking.
                <sup>
                    <xref ref-type="bibr" rid="ref19">19</xref>
                </sup>
            </p>
            <p>At the structural level, the dataset was checked for consistency across the released XLSX, CSV, JSON, and JSONL formats. These checks included field alignment, label consistency, preservation of provenance metadata, and consistency of release-specific fields such as Gold_Status, Verification_Status, Eligibility, Recommended_Split, and Quality_Flag. Where possible, recoverable character-level offsets (Start_Char and End_Char) were retained to support later conversion into stricter span-based NER formats.</p>
            <p>At the record level, validation followed a review-oriented workflow. Priority was given to rows that were more likely to affect downstream benchmark quality, including records with missing extracted entity text, incomplete provenance, lower-frequency labels, and rows reserved for augmentation-oriented use. Review outcomes were tracked explicitly through categories such as keep, edit, drop, and move_to_augmented_only, together with supporting metadata fields such as Is_Verified, Verification_Status, Gold_Status, and Quality_Flag.</p>
            <p>As shown in 
                <xref ref-type="table" rid="T4">
Table 4</xref>, the current release provides stronger coverage for high-frequency classes such as PER, ORG, and LOC, whereas lower-frequency classes such as BANK and COURT remain more limited and should therefore be interpreted with additional caution in benchmark-oriented settings.</p>
            <table-wrap id="T4" orientation="portrait" position="float">
                <label>
Table 4. </label>
                <caption>
                    <title>Number of records per entity label in the current release.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Label</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">
Number of records</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">PER</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">2000</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">ORG</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">2000</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">LOC</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">1700</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">POSITION</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">1300</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DATE</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">1200</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">MONEY</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">1200</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DOCNO</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">1200</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">LAW</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">1200</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">TIN</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">800</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">CADASTRE</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">700</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">BANK</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">411</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">COURT</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">325</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>Total</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>14,036</bold>
</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <p>A further validation principle was explicit provenance separation. The release distinguishes between source-grounded open-source rows and synthetic augmentation rows, and this distinction was preserved through record-level metadata. Synthetic rows were not treated as equivalent to benchmark-oriented source-grounded examples and were retained only for augmentation-aware training use. This separation improves interpretability and reduces the risk of unintentionally mixing evaluation material with training support data.</p>
            <p>The released package should therefore be interpreted as a human-reviewed, gold-ready dataset resource rather than as a fully finalized gold-standard benchmark. Its main strengths are its multi-format release design, explicit provenance metadata, and review-aware structure. These features make the dataset immediately usable for exploratory analysis, dataset refinement, and controlled training use under provenance-aware conditions.</p>
            <p>At the same time, several limitations should be noted. First, not all records have undergone exhaustive final manual span adjudication. This is especially relevant for label pairs with higher ambiguity potential, such as ORG vs BANK, ORG vs COURT, DOCNO vs TIN, and DOCNO vs CADASTRE. Second, the current release is class-imbalanced, with stronger coverage for higher-frequency classes such as PER, ORG, and LOC, and more limited coverage for lower-frequency classes such as BANK and COURT. Third, the package contains both source-grounded and synthetic material with different evidential status, which requires provenance-aware filtering depending on the intended use case.</p>
            <p>For conservative benchmark-oriented evaluation, users should prioritize the most reliable source-grounded rows and apply additional manual verification where necessary. For training-oriented experiments, the augmented subset may also be used, provided that the inclusion of synthetic support is reported explicitly.</p>
        </sec>
        <sec id="sec12">
            <title>Ethical considerations</title>
            <p>This work did not involve animal experiments or direct human-subject research. The dataset was compiled from publicly accessible and reusable Uzbek legal and quasi-legal textual materials, together with controlled synthetic augmentation used for training support in lower-frequency labels. No direct participant recruitment, intervention, or experimental data collection was conducted. Therefore, ethical approval and informed consent were not required.</p>
            <p>The release was prepared for research reuse with attention to provenance, documentation, and transparency. Users of the dataset are expected to follow applicable legal, ethical, and institutional requirements when handling identifier-like fields and other potentially sensitive legal-domain information.</p>
        </sec>
    </body>
    <back>
        <sec id="sec15" sec-type="data-availability">
            <title>Data availability</title>
            <p>The dataset is publicly available via Zenodo: 
                <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.19682709">https://doi.org/10.5281/zenodo.19682709</ext-link>.
                <sup>
                    <xref ref-type="bibr" rid="ref20">20</xref>
                </sup> The Zenodo record includes the released dataset files, supporting documentation, and release metadata required to interpret and reuse the package. Data are available under the terms of the 
                <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International license (CC-BY 4.0)</ext-link>.</p>
            <sec id="sec16">
                <title>Extended data</title>
                <p>Extended data associated with this article are available in the same Zenodo repository: 
                    <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.19682709">https://doi.org/10.5281/zenodo.19682709</ext-link>.
                    <sup>
                        <xref ref-type="bibr" rid="ref20">20</xref>
                    </sup> These materials include the README file, annotation guidelines, label definitions and boundary rules, preprocessing and script notes, data dictionary, split guidance, changelog, license information, and supporting documentation describing provenance, verification, and package interpretation. These files are provided to support transparent reuse and correct interpretation of the dataset.</p>
                <p>Data are available under the terms of the 
                    <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International license (CC-BY 4.0)</ext-link>.</p>
            </sec>
        </sec>
        <ack>
            <title>Acknowledgements</title>
            <p>The authors would like to thank the colleagues and reviewers who contributed to the review, organization, and refinement of the dataset and its documentation. Their feedback helped improve the structure and clarity of the released resource. During the preparation of this manuscript, the authors used ChatGPT (GPT-5.2, OpenAI) only for grammar and spelling checks and not for study design, data collection, data labeling, model training, statistical analysis, or interpretation of results. All scientific content, analyses, and conclusions were produced and verified by the authors, who take full responsibility for the publication.</p>
        </ack>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <label>1</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Saidov</surname>
                            <given-names>BR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Barakhnin</surname>
                            <given-names>VB</given-names>
                        </name>
</person-group>:
                    <article-title>Sentiment analysis of Uzbek texts using NER: A comparative study of SVM, LSTM, and BERT models.</article-title>
                    <source>

                        <italic toggle="yes">The Herald of the Siberian State University of Telecommunications and Information Science.</italic>
</source>
                    <year>2025</year>;<volume>19</volume>:<fpage>3</fpage>&#x2013;<lpage>17</lpage>.
                    <pub-id pub-id-type="doi">10.55648/1998-6920-2025-19-4-3-16</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Abdullaeva</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Khamidov</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Iskandarov</surname>
                            <given-names>B</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Lexical resources and named entity recognition for low-resource languages: A comparative study.</article-title>
                    <source>

                        <italic toggle="yes">Int J Comput Linguist Appl.</italic>
</source>
                    <year>2025</year>;<volume>16</volume>:<fpage>45</fpage>&#x2013;<lpage>60</lpage>.</mixed-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bakhtiyarov</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zokirov</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gaybullaev</surname>
                            <given-names>O</given-names>
                        </name>
</person-group>:
                    <article-title>Neural architectures for entity-aware sentiment analysis in multilingual corpora.</article-title>
                    <source>

                        <italic toggle="yes">Appl. Artif. Intell.</italic>
</source>
                    <year>2025</year>;<volume>39</volume>:<fpage>412</fpage>&#x2013;<lpage>428</lpage>.
                    <pub-id pub-id-type="doi">10.1080/08839514.2025.2345612</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Panjiyeva</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Begmatov</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Djalilova</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Knowledge-based and neural hybrid models for named entity recognition in educational texts.</article-title>
                    <source>

                        <italic toggle="yes">Educ. Inf. Technol.</italic>
</source>
                    <year>2025</year>;<volume>30</volume>:<fpage>5011</fpage>&#x2013;<lpage>5030</lpage>.
                    <pub-id pub-id-type="doi">10.1007/s10639-025-11890-3</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Jumaniyozova</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ravikumar</surname>
                            <given-names>RN</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Aarthi</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Cross-lingual transfer learning for sentiment and entity recognition in low-resource settings.</article-title>
                    <source>

                        <italic toggle="yes">ACM Trans Asian Low-Resour Lang Inf Process.</italic>
</source>
                    <year>2025</year>;<volume>24</volume>:<fpage>1</fpage>&#x2013;<lpage>23</lpage>.
                    <pub-id pub-id-type="doi">10.1145/3678912</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mamadiyarov</surname>
                            <given-names>Z</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ngongo</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Buriev</surname>
                            <given-names>KT</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Hybrid deep learning models for multilingual sentiment and entity extraction.</article-title>
                    <source>

                        <italic toggle="yes">J Artif Intell Soft Comput Res.</italic>
</source>
                    <year>2025</year>;<volume>15</volume>:<fpage>201</fpage>&#x2013;<lpage>214</lpage>.</mixed-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Saidov</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Barakhnin</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Saparbaev</surname>
                            <given-names>R</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A hybrid NER&#x2013;sentiment model for Uzbek texts: Integrating lexical, deep learning, and entity-based approaches.</article-title>
                    <source>

                        <italic toggle="yes">Big Data Cogn. Comput.</italic>
</source>
                    <year>2026</year>;<volume>10</volume>:<fpage>92</fpage>.
                    <pub-id pub-id-type="doi">10.3390/bdcc10030092</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mengliev</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Barakhnin</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Abdurakhmonova</surname>
                            <given-names>N</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Developing named entity recognition algorithms for Uzbek: Dataset insights and implementation.</article-title>
                    <source>

                        <italic toggle="yes">Data Brief.</italic>
</source>
                    <year>2024</year>;<volume>54</volume>:<fpage>110413</fpage>.
                    <pub-id pub-id-type="pmid">38708296</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.dib.2024.110413</pub-id>
                    <pub-id pub-id-type="pmcid">PMC11067374</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mengliev</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Barakhnin</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Eshkulov</surname>
                            <given-names>M</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A comprehensive dataset and neural network approach for named entity recognition in the Uzbek language.</article-title>
                    <source>

                        <italic toggle="yes">Data Brief.</italic>
</source>
                    <year>2025</year>;<volume>58</volume>:<fpage>111249</fpage>.
                    <pub-id pub-id-type="pmid">39811531</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.dib.2024.111249</pub-id>
                    <pub-id pub-id-type="pmcid">PMC11732609</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mengliev</surname>
                            <given-names>D</given-names>
                        </name>
</person-group>:
                    <article-title>Dataset of Named Entity Recognition for Uzbek language.</article-title>
                    <source>

                        <italic toggle="yes">Mendeley Data.</italic>
</source>
                    <year>2024</year>:<volume>V1</volume>.
                    <pub-id pub-id-type="doi">10.17632/xf7pyvhb2v.1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Yusufu</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jiang</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ainiwaer</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>UZNER: A Benchmark for Named Entity Recognition in Uzbek.</chapter-title>
                    <source>

                        <italic toggle="yes">Natural Language Processing and Chinese Computing.</italic>
</source>
                    <publisher-loc>Cham, Switzerland</publisher-loc>:
                    <publisher-name>Springer</publisher-name>;<year>2023</year>; pp.<fpage>171</fpage>&#x2013;<lpage>183</lpage>.
                    <pub-id pub-id-type="doi">10.1007/978-3-031-44693-1_14</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Au</surname>
                            <given-names>TWT</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lampos</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Cox</surname>
                            <given-names>I</given-names>
                        </name>
</person-group>:
                    <chapter-title>E-NER&#x2014;An annotated named entity recognition corpus of legal text.</chapter-title>
                    <source>

                        <italic toggle="yes">Proceedings of the Natural Legal Language Processing Workshop 2022.</italic>
</source>
                    <publisher-loc>Abu Dhabi, United Arab Emirates (Hybrid)</publisher-loc>:
                    <publisher-name>Association for Computational Linguistics</publisher-name>;<year>2022</year>; pp.<fpage>246</fpage>&#x2013;<lpage>255</lpage>.
                    <pub-id pub-id-type="doi">10.18653/v1/2022.nllp-1.22</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>P&#x0103;i&#x0219;</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mitrofan</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gasan</surname>
                            <given-names>CL</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>LegalNERo: A linked corpus for named entity recognition in the Romanian legal domain.</article-title>
                    <source>

                        <italic toggle="yes">Semant Web.</italic>
</source>
                    <year>2024</year>;<volume>15</volume>:<fpage>831</fpage>&#x2013;<lpage>844</lpage>.
                    <pub-id pub-id-type="doi">10.3233/SW-233351</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <label>14</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Yulianti</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bhary</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Abdurrohman</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Named entity recognition on Indonesian legal documents: A dataset and study using transformer-based models.</article-title>
                    <source>

                        <italic toggle="yes">International Journal of Electrical and Computer Engineering (IJECE).</italic>
</source>
                    <year>2024</year>;<volume>14</volume>:<fpage>5489</fpage>&#x2013;<lpage>5501</lpage>.
                    <pub-id pub-id-type="doi">10.11591/ijece.v14i5.pp5489-5501</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref15">
                <label>15</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ullah</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gelbukh</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zamir</surname>
                            <given-names>MT</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Enhancement of named entity recognition in low-resource languages with data augmentation and BERT models: A case study on Urdu.</article-title>
                    <source>

                        <italic toggle="yes">Computers.</italic>
</source>
                    <year>2024</year>;<volume>13</volume>:<fpage>258</fpage>.
                    <pub-id pub-id-type="doi">10.3390/computers13100258</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref16">
                <label>16</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chen</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Pei</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ke</surname>
                            <given-names>Z</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Low-resource named entity recognition via the pre-training model.</article-title>
                    <source>

                        <italic toggle="yes">Symmetry.</italic>
</source>
                    <year>2021</year>;<volume>13</volume>:<fpage>786</fpage>.
                    <pub-id pub-id-type="doi">10.3390/sym13050786</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <label>17</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Saidov</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Fayzullaeva</surname>
                            <given-names>Z</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bazarova</surname>
                            <given-names>U</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Uzbek Legal NER Dataset Package: A Gold-Ready Multi-Format Resource with Core Gold and Extended Augmented Layers.</article-title>
                    <source>

                        <italic toggle="yes">Zenodo.</italic>
</source>
                    <year>2026</year>.
                    <pub-id pub-id-type="doi">10.5281/zenodo.19682709</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <label>18</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Torge</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Politov</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lehmann</surname>
                            <given-names>C</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Named entity recognition for low-resource languages&#x2014;Profiting from language families.</chapter-title>
                    <source>

                        <italic toggle="yes">Proceedings of the 9th Workshop on Slavic Natural Language Processing 2023 (SlavicNLP 2023).</italic>
</source>
                    <publisher-loc>Dubrovnik, Croatia</publisher-loc>:
                    <publisher-name>Association for Computational Linguistics</publisher-name>;<year>2023</year>; pp.<fpage>1</fpage>&#x2013;<lpage>10</lpage>.
                    <pub-id pub-id-type="doi">10.18653/v1/2023.bsnlp-1.1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <label>19</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bahad</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mishra</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Krishnamurthy</surname>
                            <given-names>P</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Fine-tuning pre-trained named entity recognition models for Indian languages.</chapter-title>
                    <source>

                        <italic toggle="yes">Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop).</italic>
</source>
                    <publisher-loc>Mexico City, Mexico</publisher-loc>:
                    <publisher-name>Association for Computational Linguistics</publisher-name>;<year>2024</year>; pp.<fpage>75</fpage>&#x2013;<lpage>82</lpage>.
                    <pub-id pub-id-type="doi">10.18653/v1/2024.naacl-srw.9</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref20">
                <label>20</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Saidov</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Fayzullaeva</surname>
                            <given-names>Z</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bazarova</surname>
                            <given-names>U</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Uzbek Legal NER Dataset Package: A Gold-Ready Multi-Format Resource with Core Gold and Extended Augmented Layers.</article-title>
                    <source>

                        <italic toggle="yes">Zenodo.</italic>
</source>
                    <year>2026</year>.
                    <pub-id pub-id-type="doi">10.5281/zenodo.19682709</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
</article>
