<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2022.1053449</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Lexicon and attention-based named entity recognition for kiwifruit diseases and pests: A Deep learning approach</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Lilin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2018105"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Nie</surname>
<given-names>Xiaolin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2045730"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Mingmei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Gu</surname>
<given-names>Mingyang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Geissen</surname>
<given-names>Violette</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1140079"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ritsema</surname>
<given-names>Coen J.</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/112997"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Niu</surname>
<given-names>Dangdang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhang</surname>
<given-names>Hongming</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Information Engineering, Northwest Agricultural and Forestry (A&amp;F) University</institution>, <addr-line>Yangling</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Soil Physics and Land Management Group, Wageningen University</institution>, <addr-line>Wageningen</addr-line>, <country>Netherlands</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Uzair Aslam Bhatti, Hainan University, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Mir Muhammad Nizamani, Guizhou University, China; Ahmad Hasnain, Nanjing Normal University, China; Yiyuan Wang, Northeast Normal University, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Dangdang Niu, <email xlink:href="mailto:niudd@nwafu.edu.cn">niudd@nwafu.edu.cn</email>; Hongming Zhang, <email xlink:href="mailto:zhm@nwafu.edu.cn">zhm@nwafu.edu.cn</email>
</p>
</fn>
<fn fn-type="other" id="fn002">
<p>This article was submitted to Technical Advances in Plant Science, a section of the journal Frontiers in Plant Science</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>17</day>
<month>11</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>1053449</elocation-id>
<history>
<date date-type="received">
<day>25</day>
<month>09</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>19</day>
<month>10</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Zhang, Nie, Zhang, Gu, Geissen, Ritsema, Niu and Zhang</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Zhang, Nie, Zhang, Gu, Geissen, Ritsema, Niu and Zhang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Named Entity Recognition (NER) is a crucial step in mining information from massive agricultural texts, which is required in the construction of many knowledge-based agricultural support systems, such as agricultural technology question answering systems. The vital domain characteristics of Chinese agricultural text cause the Chinese NER (CNER) in kiwifruit diseases and pests to suffer from the insensitivity of common word segmentation tools to kiwifruit-related texts and the feature extraction capability of the sequence encoding layer being challenged. In order to alleviate the above problems, effectively mine information from kiwifruit-related texts to provide support for agricultural support systems such as agricultural question answering systems, this study constructed a novel Chinese agricultural NER (CANER) model KIWINER by statistics-based new word detection and two novel modules, AttSoftlexicon (Criss-cross attention-based Softlexicon) and PCAT (Parallel connection criss-cross attention), proposed in this paper. Specifically, new words were detected to improve the adaptability of word segmentation tools to kiwifruit-related texts, thereby constructing a kiwifruit lexicon. The AttSoftlexicon integrates word information into the model and makes full use of the word information with the help of Criss-cross attention network (CCNet). And the PCAT improves the feature extraction ability of sequence encoding layer through CCNet and parallel connection structure. The performance of KIWINER was evaluated on four datasets, namely KIWID (Self-annotated), Boson, ClueNER, and People&#x2019;s Daily, which achieved optimal F<sub>1</sub>-scores of 88.94%, 85.13%, 80.52%, and 92.82%, respectively. Experimental results in many aspects illustrated that methods proposed in this paper can effectively improve the recognition effect of kiwifruit diseases and pests named entities, especially for diseases and pests with strong domain characteristics</p>
</abstract>
<kwd-group>
<kwd>intelligent farming for diseases recognition</kwd>
<kwd>Chinese named entity recognition</kwd>
<kwd>kiwifruit diseases and pests</kwd>
<kwd>data mining</kwd>
<kwd>lexicon</kwd>
<kwd>Criss-cross attention</kwd>
<kwd>deep learning</kwd>
<kwd>machine learning</kwd>
</kwd-group>
<counts>
<fig-count count="6"/>
<table-count count="8"/>
<equation-count count="19"/>
<ref-count count="41"/>
<page-count count="16"/>
<word-count count="7653"/>
</counts>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<title>1 Introduction</title>
<p>Kiwifruit is one of the economic sources of the planting industry in China, but owing to the impact of pests and diseases, the overall level of kiwifruit quality in China is not high at present (<xref ref-type="bibr" rid="B20">Jiang and Zong, 2020</xref>). Chinese named entity recognition in the field of agriculture aims to recognize the boundaries and categories of agriculture-related entities from unstructured agricultural texts, such as diseases, pests, and pesticides (<xref ref-type="bibr" rid="B14">Guo et&#xa0;al., 2020</xref>). This is a key technology in the automatic mining of knowledge from very large Chinese agricultural texts and is the basis for downstream tasks such as building agricultural knowledge graphs and constructing agricultural intelligent question-and-answer (Q&amp;A) systems (<xref ref-type="bibr" rid="B8">Drury and Roche, 2019</xref>; <xref ref-type="bibr" rid="B14">Guo et&#xa0;al., 2020</xref>). Therefore, accurate recognition of named entities in the field of kiwifruit plays an important role in ensuring the healthy development of the industry, plant protection, and convenience for agricultural workers.</p>
<p>Traditional NER methods can be divided into rule-based, dictionary-matching-based, and machine-learning-based approaches (<xref ref-type="bibr" rid="B14">Guo et&#xa0;al., 2020</xref>). Although each approach can achieve good results, they rely heavily on time- and energy-consuming pattern matching and feature engineering and have poor generalization ability. Through the application of deep learning in the field of NER and other fields (<xref ref-type="bibr" rid="B7">Chiu and Nichols, 2016</xref>; <xref ref-type="bibr" rid="B5">Bhatti et&#xa0;al., 2020b</xref>), researchers have developed various techniques for medical science (<xref ref-type="bibr" rid="B39">Zhao et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B3">Bhatti et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B31">Nawaz et&#xa0;al., 2021</xref>), cyber security (<xref ref-type="bibr" rid="B25">Li T et al., 2020</xref>), agriculture (<xref ref-type="bibr" rid="B6">Biswas and Sharan, 2021</xref>), social media (<xref ref-type="bibr" rid="B2">Aguilar et&#xa0;al., 2017</xref>) and environmental science (<xref ref-type="bibr" rid="B4">Bhatti et&#xa0;al., 2020a</xref>; <xref ref-type="bibr" rid="B1">Aamir et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B10">Galvan et&#xa0;al., 2022</xref>). In the field of Chinese NER (CNER), because sentences in Chinese texts are not naturally separated, unlike sentences in English, there is no obvious border symbol. Therefore, the first step in many original deep-learning-based CNER methods is to segment the text using word segmentation tools (<xref ref-type="bibr" rid="B35">Yang et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B15">He and Sun, 2017</xref>). With the development of research on CNER, many researches show that the character-based CNER model avoids segmentation errors and makes it more suitable than the word based model. (<xref ref-type="bibr" rid="B21">Jingzhou and Houfeng, 2008</xref>; <xref ref-type="bibr" rid="B28">Liu et&#xa0;al., 2010</xref>). However, in order to avoid the problem of segmentation errors, the character based CNER model cannot use Chinese word information. Recently many researchers have realized that word information will play a positive role in the correct recognition of Chinese entity boundaries. Therefore, lexicon-based CNER models have been widely used in recent years. For example, <xref ref-type="bibr" rid="B38">Zhang and Yang (2018)</xref> introduced the lattice long short-term memory model (Lattice-LSTM) based on a lexicon, allowing character-level and word-level information corresponding to the characters to be encoded simultaneously. <xref ref-type="bibr" rid="B32">Peng et&#xa0;al. (2020)</xref> proposed the Softlexicon method to integrate word information into the NER model by simply adjusting the character representation layer. The lexicon based model, with the help of the public lexicon, achieves better results than the purely character based model (<xref ref-type="bibr" rid="B32">Peng et&#xa0;al., 2020</xref>). For example, when the lexicon based model recognizes the Chinese entity &#x201c;&#x957f;&#x6c5f;&#x5927;&#x6865;&#x201d; (Yangtze River Bridge), words such as &#x201c;&#x957f;&#x6c5f;&#x201d; (Yangtze River), &#x201c;&#x5927;&#x6865;&#x201d; (Bridge), and &#x201c;&#x957f;&#x6c5f;&#x5927;&#x6865;&#x201d; (Yangtze River Bridge) in the lexicon can help eliminate the ambiguity of potentially related named entities in the context, such as the person name &#x201c;&#x6c5f;&#x5927;&#x6865;&#x201d; (Daqiao Jiang) (<xref ref-type="bibr" rid="B38">Zhang and Yang, 2018</xref>).</p>
<p>For CNER in the field of agriculture (CANER). The lexicon-based method makes good use of character information and word information, so using them to solve the CANER problem may be a theoretically feasible solution too. However, there is currently no open-source lexicon in the field of agriculture, and manual lexicon construction is labor-intensive. If the lexicon is built through automatic word segmentation, the existing word segmentation tools face the problem of word segmentation errors caused by insensitive word segmentation. For example, farm chemicals entities such as &#x201c;&#x901f;&#x4e50;&#x787c;&#x3001;&#x8f9b;&#x786b;&#x78f7;&#x4e73;&#x6cb9;&#x201d; (solubor, phoxim) and kiwifruit variety entities &#x201c;&#x4e2d;&#x534e;&#x7315;&#x7334;&#x6843;&#x3001;&#x7ea2;&#x5fc3;&#x7315;&#x7334;&#x6843;&#x201d; (Actinidia chinensis Planch., red-fleshed kiwi), which exist in kiwifruit-related texts, have strong domain characteristics, and these will make the word segmentation tool insensitive in the form of out-of-vocabulary (OOV) words. Therefore, many CANER methods are still character-based models (<xref ref-type="bibr" rid="B14">Guo et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B40">Zhao et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B13">Guo et&#xa0;al., 2022</xref>), and the use of word information is hindered by word segmentation errors. As for the sequence coding layer of recently CANER model, bidirectional long short-term memory (BiLSTM) is still the mainstream deep learning method, which can memorize long-text sequence features in theory (<xref ref-type="bibr" rid="B26">Liu et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B40">Zhao et&#xa0;al., 2021</xref>). However, the contextual feature extraction ability of BiLSTM has the following limitations. First, with an increase in sentence length, the feature extraction ability of BiLSTM will decline (<xref ref-type="bibr" rid="B24">Li Y et al., 2020</xref>). Second, BiLSTM makes each character contribute equally to the task (<xref ref-type="bibr" rid="B14">Guo et&#xa0;al., 2020</xref>), but the contribution of different types of characters in agricultural texts to the task is certainly different. Third, the strong domain features of kiwifruit-related text, particularly farm chemical-related entities, disease-related entities, and pest-related entities, pose a challenge to the feature extraction ability of BiLSTM. In summary, deep learning-based methods for CANER in the field of kiwifruit diseases and pests face the following problems: The use of word information is hampered by OOV problem in the process of lexicon construction. And the contextual information capture capability of the sequence encoding layer needs to be further improved.</p>
<p>This research proposes a lexicon-based CANER model KIWINER on the basis of bidirectional long short term memory and conditional random field model (BiLSTM-CRF). The objectives of KIWINER are to take measures to solve the above problems in the end of the previous paragraph, that is, to integrate the word information containing domain features into the model, improve the model feature extraction ability, and ultimately provide support for the construction of the kiwifruit Q&amp;A system. Specifically, KIWINER improves the recognition quality through statistics-based new word detection, AttSoftlexicon, and PCAT. First, statistics-based new word detection is innovatively used to detect new words in kiwifruit-related text corpora, thereby improving the adaptability of word segmentation tools to kiwifruit-related texts and reducing the impact of word segmentation errors on the lexicon construction process; Second, through the AttSoftlexicon method proposed in this paper, based on Softlexicon (<xref ref-type="bibr" rid="B32">Peng et&#xa0;al., 2020</xref>) and CCNet (<xref ref-type="bibr" rid="B17">Huang et&#xa0;al., 2019</xref>), the character and word information in the lexicon are integrated into the model, and the position information of the character in the corresponding words can be fully utilized with the help of CCNet (<xref ref-type="bibr" rid="B17">Huang et&#xa0;al., 2019</xref>); Third, a novel module parallel connection criss-cross attention network (PCAT) is proposed to improve the contextual feature extraction ability of BiLSTM. PCAT assigns different weights to different characters according to their correlation and constructs a parallel structure through convolutional layers with different filter sizes to obtain richer semantic information. Additionally, this study collected publicly available textual information and constructed a kiwifruit NER dataset consisting of 17809 entities across six categories. Previous CANER methods based on machine learning, such as CRF (<xref ref-type="bibr" rid="B29">Li et&#xa0;al., 2017</xref>), rely on manual features or rules, which are time-consuming and unable to process a large number of complex agricultural texts (<xref ref-type="bibr" rid="B14">Guo et&#xa0;al., 2020</xref>). The CANER methods such as Att-BiLSTM-CRF (<xref ref-type="bibr" rid="B40">Zhao et&#xa0;al., 2021</xref>) use the deep learning method to reduce the work of designing feature extractors for each problem and solve the above problems. Compared with the popular CANER methods based on deep learning, our proposed KIWINER alleviates the OOV problem through new word detection, and makes full use of lexical information and agricultural features in addition to character information through AttSoftlexicon and PCAT, so the feature extraction ability of deep learning model is effectively improved. We also use KIWINER and five typical CNER models and two popular CANER models for comparative experiments, and the KIWINER model yields better performance.</p>
<p>The remainder of this paper is organized as follows. The materials used in this study and the methods proposed in this paper is discussed in detail in section 2. Section 2 also introduces the experimental parameters, dataset division, evaluation metrics, and the experimental environment. The experimental details and results are presented in Section 3. The discussion of this study is presented in section 4. Finally, the conclusions are presented in Section 5.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<title>2 Materials and methods</title>
<p>The overall architecture of KIWINER, shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>, indicates that the model contains six layers and uses BiLSTM-CRF as the basic framework. This section first introduces the experimental materials. Then this section focuses on the implementation details of the new word detection layer, embedding layer, CCNet, and AttSoftlexicon and PCAT proposed in this paper. Details of the BiLSTM and CRF layers can be found in (<xref ref-type="bibr" rid="B18">Huang et&#xa0;al., 2015</xref>).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>The architecture of KIWINER.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1053449-g001.tif"/>
</fig>
<sec id="s2_1">
<title>2.1 Materials</title>
<p>To solve the problem of the limited public NER dataset for CANER, a new kiwifruit-related annotated corpus, named KIWID, was collected and annotated under the guidance of plant protection experts from Northwest A&amp;F University.</p>
<sec id="s2_1_1">
<title>2.1.1 Corpus collection</title>
<p>To ensure the quality of data, this study collected public information on kiwifruit diseases and pests from the official websites of trusted research institutions and Baidu Encyclopedia. Preprocessing was applied to remove non-useful content, such as webpage tags, links, and special characters contained in the corpus. Finally, a corpus (Corpus A of kiwifruit) containing 61103 sentences for training character vectors and detecting new words was obtained.</p>
</sec>
<sec id="s2_1_2">
<title>2.1.2 Corpus tagging</title>
<p>We selected 12477 sentences from Corpus A to form Corpus B. Under the guidance of plant experts from Northwest A&amp;F University, six types of kiwifruit-related entities were labeled, as shown in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>. Therefore, this study used the BMES (<xref ref-type="bibr" rid="B33">Ratinov and Roth, 2009</xref>) tagging scheme to tag Corpus B, where B, M, E, and S represent the beginning, middle, and end of an entity, and a single-word entity, respectively. To ensure annotation quality, the manual annotation method was adopted. Finally, the kiwifruit-related dataset KIWID containing 17809 entities was obtained, and the statistical information of KIWID is presented in the last column of <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Statistics of KIWID.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Category (Symbol)</th>
<th valign="top" align="center">Category definition</th>
<th valign="top" align="center">Examples</th>
<th valign="top" align="center">Numbers</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Varieties (KIWI)</td>
<td valign="top" align="center">Names of different varieties of kiwifruit.</td>
<td valign="top" align="center">&#x9647;&#x5357;&#x7315;&#x7334;&#x6843; (Longnan kiwi)</td>
<td valign="top" align="center">3763</td>
</tr>
<tr>
<td valign="top" align="left">Disease (DIS)</td>
<td valign="top" align="center">Diseases of kiwifruit.</td>
<td valign="top" align="center">&#x53f6;&#x67af;&#x75c5; (Leaf blight)</td>
<td valign="top" align="center">561</td>
</tr>
<tr>
<td valign="top" align="left">Pest (PEST)</td>
<td valign="top" align="center">Pests of kiwifruit.</td>
<td valign="top" align="center">&#x53f6;&#x8749; (Leaf cicada)</td>
<td valign="top" align="center">1247</td>
</tr>
<tr>
<td valign="top" align="left">Part (PART)</td>
<td valign="top" align="center">Diseases harming kiwifruit parts.</td>
<td valign="top" align="center">&#x53f6;&#x7247; (Leaves)&#x3001;&#x679d;&#x5e72;(branches)</td>
<td valign="top" align="center">5521</td>
</tr>
<tr>
<td valign="top" align="left">Farm chemical (MED)</td>
<td valign="top" align="center">Farm chemicals.</td>
<td valign="top" align="center">&#x591a;&#x83cc;&#x7075; (Carbendazim)</td>
<td valign="top" align="center">907</td>
</tr>
<tr>
<td valign="top" align="left">Place (LOC)</td>
<td valign="top" align="center">Distribution area of kiwifruit</td>
<td valign="top" align="center">&#x9655;&#x897f; (Shaanxi)</td>
<td valign="top" align="center">5090</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_1_3">
<title>2.1.3 Analysis of Corpus features</title>
<p>(1) Contains several specialized vocabulary terms.</p>
<p>Entities involved in agricultural diseases and pests such as farm chemicals entities, pest entities, plant disease entities, and varieties entities are annotated in the corpus, such as &#x201c;&#x4e8c;&#x7532;&#x5417;&#x5549;&#x201d; (dimethomorph), &#x201c;&#x8054;&#x82ef;&#x83ca;&#x916f;&#x201d; (bifenthrin), &#x201c;&#x4ecb;&#x58f3;&#x866b;&#x201d; (scale insect), and &#x201c;&#x6591;&#x70b9;&#x75c5;&#x201d; (scab). Such words usually do not appear in the built-in dictionaries of common word-segmentation tools and have strong domain characteristics. Therefore, most word segmentation tools have poor adaptability to these specialized terms, leading to a greater likelihood of word segmentation errors. If the word information in the lexicon constructed by automatic word segmentation is introduced into the CANER model, the accuracy of the model may be significantly affected by word segmentation errors.</p>
<p>(2) Number of entities is unevenly distributed.</p>
<p>As shown in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>, there are differences in the number of different types of entities. The same problem exists not only in agriculture (<xref ref-type="bibr" rid="B13">Guo et&#xa0;al., 2022</xref>) but also in clinical medicine (<xref ref-type="bibr" rid="B23">Kong et&#xa0;al., 2021</xref>). The uneven distribution of the number of entities introduces challenges to the feature extraction ability of the CANER models.</p>
<p>(3) Entities nested within each other</p>
<p>Nested named entities are a common problem in the field of NER in the task of identifying kiwifruit-related entities. For example, there are two entities nested in &#x201c;&#x4e2d;&#x534e;&#x7315;&#x7334;&#x6843;&#x201d; (Actinidia chinensis Planch.), which are the location entity &#x201c;&#x4e2d;&#x534e;&#x201d; (China) and the plant entity &#x201c;&#x7315;&#x7334;&#x6843;&#x201d; (kiwifruit). First, this leads to errors in word segmentation. For example, Jieba&#x2019;s word segmentation result of &#x201c;&#x4e2d;&#x534e;&#x7315;&#x7334;&#x6843;&#x201d; (Actinidia chinensis Planch.) is &#x201c;&#x4e2d;&#x534e; &#x7315;&#x7334;&#x6843;&#x201d; (China kiwifruit). If the lexicon for the NER model contains incorrect word segmentation information, it provides misleading information for the identification of entity boundaries. Moreover, the phenomenon of nested entities also increases the difficulty of entity recognition and introduces challenges to the feature extraction ability of the model.</p>
</sec>
</sec>
<sec id="s2_2">
<title>2.2 New word detection layer</title>
<p>New word detection can identify OOV words and add them to the built-in dictionary of the word segmentation tool, thus improving the effect of common word segmentation tools (<xref ref-type="bibr" rid="B9">Du et&#xa0;al., 2016</xref>). Currently, new word detection is either rule-based (<xref ref-type="bibr" rid="B19">Huiming et&#xa0;al., 2003</xref>), statistics-based (<xref ref-type="bibr" rid="B22">Jin and Tanaka-Ishii, 2006</xref>), or based on both rules and statistics (<xref ref-type="bibr" rid="B41">Zheng and Wen-Hua, 2002</xref>). Methods that rely entirely or partly on rules rely on a manually built rule base. Although the rule base is helpful in improving the effectiveness of new word detection, the construction process is complex and time-consuming, and domain transferability is poor. As a result, this study adopts a statistics-based new word detection method. Corpus A was first segmented into strings using the <italic>N-gram</italic> method, and the garbage strings were then filtered in turn according to the three statistics of word frequency (<italic>WF</italic>), mutual information (<italic>MI</italic>), and contextual entropy (<italic>CE</italic>) of the strings. Subsequently, a new word set was obtained. This new word set was then added to the built-in dictionary of Jieba to improve its applicability to kiwifruit-related texts. Finally, the kiwifruit lexicon was constructed through the word segmentation of Corpus B by Jieba. This section first introduces the methods related to new word detection, and then introduces the lexicon construction process.</p>
<sec id="s2_2_1">
<title>2.2.1 <italic>N-gram</italic> Word segmentation</title>
<p>The basic idea of <italic>N-gram</italic> word segmentation is to use a fixed window of length <italic>n</italic> to segment the sentence. After segmentation, each string of size <italic>N</italic> is called a &#x201c;gram.&#x201d; For example, the 2-gram segmentation result of the sentence &#x201c;&#x519c;&#x4e1a;&#x75c5;&#x5bb3;&#x8bc6;&#x522b;&#x201d; (agricultural disease identification) is &#x201c;&#x519c;&#x4e1a;/&#x4e1a;&#x75c5;/&#x75c5;&#x5bb3;/&#x5bb3;&#x8bc6;/&#x8bc6;&#x522b;&#x201d; (nong ye/ye bing/bing hai/hai shi/shi bie). Other examples are shown in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2A</bold>
</xref>.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>
<bold>(A, B)</bold> Lexicon construction process.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1053449-g002.tif"/>
</fig>
</sec>
<sec id="s2_2_2">
<title>2.2.2 Mutual information</title>
<p>The concept of <italic>MI</italic> originates from information theory and is commonly used to measure how consistently two patterns occur together in a corpus (<xref ref-type="bibr" rid="B36">Ye et&#xa0;al., 2013</xref>). The <italic>MI</italic> value is derived from the log-likelihood ratio of the joint probability of patterns <italic>A</italic> and <italic>B</italic> over the individual probabilities of patterns <italic>A</italic> and <italic>B</italic>, as shown in Equation (1). If Chinese strings <italic>w<sub>1</sub>
</italic> and <italic>w<sub>2</sub>
</italic> in the same dataset appear as a whole string <italic>w<sub>12</sub>
</italic>, the probability is <italic>p(w<sub>12</sub>)</italic>, and the probabilities of the two strings appearing alone are <italic>p(w<sub>1</sub>)</italic> and <italic>p(w<sub>2</sub>)</italic>, respectively. The <italic>MI</italic> value was calculated using formula (2). The higher the <italic>MI</italic> value of the two strings, the more likely they are to be combined into meaningful words.</p>
<disp-formula>
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>I</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>log</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mfrac>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mi>p</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>I</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mtext>w</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>log</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mfrac>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mn>12</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mi>p</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</sec>
<sec id="s2_2_3">
<title>2.2.3 Contextual entropy</title>
<p>
<italic>CE</italic> is an external statistic proposed by (<xref ref-type="bibr" rid="B16">Huang and Powers, 2003</xref>), that can be used to measure the probability of whether a string is a meaningful word. It measures the randomness of the left and right adjacent characters of a string, that is, the left and right contextual entropies. Compared with a Chinese string with no practical meaning, a Chinese word with a practical meaning has a wider application scenario. Thus, the randomness of the set of left and right adjacent characters will be higher. Therefore, a higher <italic>CE</italic> value for a Chinese string indicates a greater probability that the string has a practical meaning. In the Chinese new word detection task, the <italic>CE</italic> accurately reflects the probability that a string is a meaningful word. The <italic>CE</italic> value was calculated using Equations (3) and (4):</p>
<disp-formula>
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:msub>
<mml:mtext>E</mml:mtext>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>w</mml:mtext>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:munder>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mo>|</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>log</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mo>|</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:msub>
<mml:mtext>E</mml:mtext>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:munder>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>|</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>log</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>|</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mstyle>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>p(w<sub>l</sub>|w)</italic> represents the probability that the left-adjacent character of <italic>w</italic> is character <italic>w<sub>l</sub>
</italic>, <italic>p(w<sub>r</sub>|w)</italic> represents the probability that the right-adjacent character of <italic>w</italic> is character <italic>w<sub>r</sub>
</italic>, <italic>S<sub>l</sub>
</italic> represents all left-adjacent characters of <italic>w</italic>, and <italic>S<sub>r</sub>
</italic> represents all right-adjacent characters of <italic>w</italic>.</p>
</sec>
<sec id="s2_2_4">
<title>2.2.4 Lexicon construction</title>
<p>The lexicon construction process occurs in four steps, as illustrated in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2B</bold>
</xref>).</p>
<p>Step 1: Apply the <italic>N-gram</italic> word segmentation method to segment corpus A and obtain candidate strings with <italic>N</italic> = 2, 3, and 4.</p>
<p>Step 2: Calculate the statistics for each string. Compute the <italic>WF</italic>, <italic>MI</italic>, and <italic>CE</italic> values for each candidate string.</p>
<p>Step 3: Set the corresponding thresholds for <italic>WF</italic>, <italic>MI</italic>, and <italic>CE</italic>, named <italic>Threshold1</italic>, <italic>Threshold2</italic>, and <italic>Threshold3</italic>, respectively, and filter the candidate strings to obtain a new set of words. To avoid the omission of low-frequency new words, we set the <italic>WF</italic> threshold to 5, <italic>MI</italic> threshold to 3.9, and <italic>CE</italic> threshold to 2.7.</p>
<p>Step 4: Add the new word set obtained in Step 3 to the built-in dictionary of Jieba and perform word segmentation on Corpus B to obtain the kiwifruit lexicon for NER.</p>
</sec>
</sec>
<sec id="s2_3">
<title>2.3 Embedding Llayer</title>
<p>For a character-based CNER model, discrete text sequences are converted into low-dimensional densely distributed embedded representations, allowing the model to learn more semantic knowledge and improve its performance (<xref ref-type="bibr" rid="B14">Guo et&#xa0;al., 2020</xref>). As shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>, to obtain a high-quality embedded representation and make good use of the information in the corpus, Word2vec-CBOW (<xref ref-type="bibr" rid="B30">Mikolov et&#xa0;al., 2013</xref>) was used to train Corpus A in character form and transform the resulting agricultural lexicon into vectors. The input sequence of length <italic>n</italic> is <italic>s=(c<sub>1</sub>, c<sub>2</sub>, c<sub>3</sub>,&#x2026;&#x2026;,c<sub>n</sub>)&#x2208;V<sub>c</sub>
</italic>, where <italic>V<sub>c</sub>
</italic> is the word set (including characters), and each word is represented by a trained dense vector <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, where <italic>e<sup>c</sup>
</italic> denotes the word embedding lookup table.</p>
</sec>
<sec id="s2_4">
<title>2.4 CCNet</title>
<p>CCNet (<xref ref-type="bibr" rid="B17">Huang et&#xa0;al., 2019</xref>) is often used in semantic segmentation to aggregate contextual information from all pixels to obtain dense contextual information. This study considered the use of CCNet for text feature extraction. The overall structure of the CCNet is shown in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>
<bold>(A, B)</bold> Structure of CCNet and PCAT.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1053449-g003.tif"/>
</fig>
<p>Given a feature map <italic>M&#x2208;R<sup>C&#xd7;W&#xd7;H</sup>
</italic>, CCNet first generates two feature maps <italic>Q</italic> and <italic>K</italic> by applying two convolutional layers with a filter size of 1&#xd7;1 on the feature map <italic>M</italic>. <italic>{Q, K}&#x2208;R<sup>C&#x2019;&#xd7;W&#xd7;H</sup>
</italic>, where <italic>C&#x2019;</italic> is the number of channels of <italic>Q</italic> and <italic>K</italic>, which is less than <italic>C</italic> for dimension reduction. Another convolutional layer with filters of size 1&#xd7;1 is applied on <italic>M</italic> to generate <italic>V&#x2208;R<sup>C&#xd7;W&#xd7;H</sup>
</italic>. <italic>Q<sub>u</sub>&#x2208;R<sup>C&#x2019;</sup>
</italic> is the vector for each position <italic>u</italic> in the spatial dimension of the feature map <italic>Q</italic>. And vector set <italic>&#x3a9;<sub>u</sub>&#x2208;R<sup>(H+W-1)</sup>&#xd7;C&#x2019;</italic> is obtained by extracting feature vectors from <italic>K</italic> which are in the same row with position <italic>u</italic>. Then, CCNet can obtain <italic>D&#x2208;R<sup>(H+W-1)&#xd7;W&#xd7;H</sup>
</italic>, which represents the degree of correlation between features <italic>Q<sub>u</sub>
</italic> and <italic>&#x3a9;<sub>i,u</sub>
</italic> (i=[1,&#x2026;,|&#x3a9;<sub>u</sub>|]) by the affinity operation, which is defined as follows:</p>
<disp-formula>
<label>(5)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi>&#x3a9;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>d<sub>i,u</sub>&#x2208;D</italic>. Feature map <italic>A</italic> is then obtained by applying a softmax layer on <italic>D</italic> over the channel dimension. CCNet can also obtain vector <italic>V<sub>u</sub>&#x2208;R<sup>C</sup>
</italic> and set <italic>&#x3b8;<sub>u</sub>&#x2208;R<sup>(H+W-1)&#xd7;C</sup>
</italic>. The set <italic>&#x3b8;<sub>u</sub>
</italic> is a collection of feature vectors in <italic>V</italic> that are in the same row as position <italic>u</italic>. Finally, the contextual information is collected by the aggregation operation:</p>
<disp-formula>
<label>(6)</label>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
<mml:mo>'</mml:mo>
<mml:mo>=</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:munder>
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>M<sub>u&#x2019;</sub>
</italic> is a feature vector in the output feature maps <italic>M&#x2019;&#x2208;R<sup>C&#xd7;W&#xd7;H</sup>
</italic> at position <italic>u</italic>, and <italic>A<sub>i,u</sub>
</italic> is a scalar value at channel <italic>i</italic> and position <italic>u</italic> in <italic>A</italic>. Contextual information is added to local feature <italic>M</italic> to enhance the local features and augment the pixel-wise representation.</p>
</sec>
<sec id="s2_5">
<title>2.5 Criss-cross attention based Softlexicon layer</title>
<p>One of the tasks of CANER is to recognize the boundaries of agricultural entities, and word segmentation information provides good guidance for identifying entity boundaries. However, CANER is affected by the strong domain characteristics of agricultural texts and the uneven distribution of entity categories (<xref ref-type="bibr" rid="B14">Guo et&#xa0;al., 2020</xref>). Adding more pre-training information will help the model learn more agricultural characteristics, thus reducing the impact of the aforementioned problems. Therefore, this paper proposes an AttSoftlexicon based on Softlexicon (<xref ref-type="bibr" rid="B32">Peng et&#xa0;al., 2020</xref>) and CCNet (<xref ref-type="bibr" rid="B17">Huang et&#xa0;al., 2019</xref>), and integrates the word information in the lexicon into character representation, which helps the model to learn more kiwifruit text features.</p>
<p>Assume that the input sequence is <italic>s={c<sub>1</sub>, c<sub>2</sub>,&#x2026;, c<sub>n</sub>}</italic>, and <italic>w<sub>i,j</sub>
</italic> denotes its subsequence <italic>{c<sub>i</sub>, c<sub>i+1</sub>,&#x2026;, c<sub>j</sub>}</italic>. The first step is lexicon matching. Each character is matched from a lexicon to all words containing the character. According to the position of each character <italic>c<sub>i</sub>
</italic> in the different matched words (beginning, middle, end, or one-character word), the words matched by a character were divided into four-word sets <italic>B(c<sub>i</sub>)</italic>, <italic>M(c<sub>i</sub>)</italic>, <italic>E(c<sub>i</sub>)</italic>, and <italic>S(c<sub>i</sub>)</italic>. The set construction method is shown in formula (7)-(10).</p>
<disp-formula>
<label>(7)</label>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo>{</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2200;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&lt;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>}</mml:mo>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(8)</label>
<mml:math display="block" id="M8">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo>{</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2200;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&lt;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&lt;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>}</mml:mo>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(9)</label>
<mml:math display="block" id="M9">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo>{</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2200;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&lt;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>}</mml:mo>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(10)</label>
<mml:math display="block" id="M10">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo>{</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2203;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>L</mml:mi>
<mml:mo>}</mml:mo>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>As shown in formula (7)-(10), <italic>L</italic> denotes the lexicon, and <italic>w</italic> represents the words matched in the lexicon. If a word set of characters is empty, it is represented as <italic>{None}</italic>. Taking the input sequence &#x201c;&#x690d;&#x7269;&#x75c5;&#x5bb3;&#x201d; (plant disease) as an example, the character &#x201c;&#x7269;&#x201d; (matter) is matched with the pre-constructed lexicon, and the two words &#x201c;&#x690d;&#x7269;&#x75c5;&#x5bb3;&#x201d; (plant disease) and &#x201c;&#x690d;&#x7269;&#x201d; (plant) are matched, and the four word sets corresponding to the character &#x201c;&#x7269;&#x201d; (matter) are formed: <italic>B={&#x201c;None&#x201d;}</italic>, <italic>M={&#x201c;&#x690d;&#x7269;&#x75c5;&#x5bb3;&#x201d;}</italic>, E={&#x201c;&#x690d;&#x7269;&#x201d;}, <italic>S={&#x201c;None&#x201d;}</italic>. The character &#x201c;&#x75c5;&#x201d; (disease) is matched with the pre-constructed lexicon, and the two words &#x201c;&#x75c5;&#x5bb3;&#x201d; (disease and pest) and &#x201c;&#x75c5;&#x201d; (disease) are matched, and the four-word sets corresponding to the character &#x201c;&#x75c5;&#x201d; (disease) are formed: <italic>B={&#x201c;&#x75c5;&#x5bb3;&#x201d;}</italic>, <italic>M={&#x201c;None&#x201d;}</italic>, <italic>E={&#x201c;None&#x201d;}</italic>, <italic>S={&#x201c;&#x75c5;&#x201d;}</italic>, as shown in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>. To integrate the word set information matched to each character into the corresponding character representation, the statistics-based static weighting method in Softlexicon (<xref ref-type="bibr" rid="B32">Peng et&#xa0;al., 2020</xref>) was used, where the frequency reflects the importance of the word.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Example of lexicon matching.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1053449-g004.tif"/>
</fig>
<p>The weighting method is given by formulae (11) and (12), where <italic>z(w)</italic> is the frequency with which a lexicon word w occurs in the statistical data and <italic>e<sup>w</sup>
</italic> is the word embedding lookup table. The weighted representation of word set <italic>S</italic> is obtained as follows:</p>
<disp-formula>
<label>(11)</label>
<mml:math display="block" id="M11">
<mml:mrow>
<mml:msup>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>S</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>4</mml:mn>
<mml:mi>Z</mml:mi>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mi>w</mml:mi>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where:</p>
<disp-formula>
<label>(12)</label>
<mml:math display="block" id="M12">
<mml:mrow>
<mml:mi>Z</mml:mi>
<mml:mo>=</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo>&#x222a;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mo>&#x222a;</mml:mo>
<mml:mi>E</mml:mi>
<mml:mo>&#x222a;</mml:mo>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In the last step, the original Softlexicon (<xref ref-type="bibr" rid="B32">Peng et&#xa0;al., 2020</xref>) combines the representations of four-word sets into the fix-dimensional feature and adds it to the representation of each character, as shown in formulae (13) and (14).</p>
<disp-formula>
<label>(13)</label>
<mml:math display="block" id="M13">
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo stretchy="false">[</mml:mo>
<mml:msup>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>;</mml:mo>
<mml:msup>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>M</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>;</mml:mo>
<mml:msup>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>E</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>;</mml:mo>
<mml:msup>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>S</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo stretchy="false">]</mml:mo>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(14)</label>
<mml:math display="block" id="M14">
<mml:mrow>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mi>c</mml:mi>
</mml:msup>
<mml:mo>&#x2190;</mml:mo>
<mml:mo stretchy="false">[</mml:mo>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mi>c</mml:mi>
</mml:msup>
<mml:mo>;</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo stretchy="false">]</mml:mo>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The original Softlexicon (<xref ref-type="bibr" rid="B32">Peng et&#xa0;al., 2020</xref>) designed four-word sets to take advantage of these four types of positional information. However, it only weighs the words in each word set according to the word frequency and does not distinguish the importance of different word sets. This does not allow the model to distinguish the four positions of the characters in the matched words.</p>
<p>CCNet (<xref ref-type="bibr" rid="B17">Huang et&#xa0;al., 2019</xref>) showed a strong contextual relationship extraction ability in the semantic segmentation task. Therefore, to make full use of these four types of position information, this study uses CCNet to learn the weights for different word sets, as shown in the formula (15). First, CCNet processes the representation of these four sets and automatically assigns weights to them based on the relationship between them. It is then transformed into a vector of 1&#xd7;4 through <italic>q</italic>. Finally, the weight vector <italic>a<sub>i</sub>
</italic> (i&#x2208;[1,4]) with a value range of (0, 1) is obtained through the sigmoid function. <italic>a<sub>i</sub>
</italic> is a weight matrix of dimensions 1&#xd7;4, where the four values represent the importance of the four word sets. As shown in formula (16), the four-word set representations are weighted and merged into the character representation.</p>
<disp-formula>
<label>(15)</label>
<mml:math display="block" id="M15">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>q</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>N</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(16)</label>
<mml:math display="block" id="M16">
<mml:mrow>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mi>c</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mo stretchy="false">[</mml:mo>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mi>c</mml:mi>
</mml:msup>
<mml:mo>;</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:msup>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>;</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:msup>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>M</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>;</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:msup>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>E</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>;</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>4</mml:mn>
</mml:msub>
<mml:msup>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>S</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo stretchy="false">]</mml:mo>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</sec>
<sec id="s2_6">
<title>2.6 Parallel connection Criss-cross attention network</title>
<p>The sequence features extracted by BiLSTM may have a few limits. First, with an increase in sentence length, the feature extraction ability of BiLSTM declines (<xref ref-type="bibr" rid="B17">Huang et&#xa0;al., 2019</xref>). In addition, LSTM has been shown to have weaker feature extraction ability than attention mechanism models, such as transformers, when dealing with longer sequence texts (Li et&#xa0;al., 2020b). Second, BiLSTM makes each character contribute equally to the task. In other words, BiLSTM is not good at assigning more weight to some important characters in the text sequence, which is very important for NER. In addition, the strong domain features of kiwifruit-related texts mentioned in Section 2.1.3 also pose challenges to the feature extraction ability of the BiLSTM. In short, the feature extraction ability of BiLSTM must be further improved when solving the problem of kiwifruit-named entity recognition. Therefore, a novel module, parallel connection criss-cross attention network (PCAT), is proposed to mitigate the impact of the above limits with the help of CCNet (<xref ref-type="bibr" rid="B17">Huang et&#xa0;al., 2019</xref>). The overall structure of the PCAT is shown in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3B</bold>
</xref>.</p>
<p>After the agricultural sentence is processed by BiLSTM, a feature map <italic>X&#x2208;R<sup>C&#xd7;W</sup>
</italic> is obtained (<italic>C</italic> represents the dimension of BiLSTM and <italic>W</italic> represents the length of the sentence). In this work, agricultural sentences are regarded as pictures with a channel number of <italic>C</italic> and a size of <italic>W&#xd7;1</italic>. Therefore, PACT first transforms <italic>X</italic> into a feature map <italic>M&#x2208;R<sup>C&#xd7;W&#xd7;H</sup>
</italic> (the value of <italic>H</italic> is 1) through an unsqueeze operation. Each pixel in the feature map <italic>M</italic> represents a character in the agriculture text.</p>
<p>To obtain richer semantic information. The PCAT uses two different convolutional layers with filter sizes of 1 &#xd7; 1 and 1 &#xd7; 3 on <italic>M</italic> to generate two feature maps, <italic>M<sub>1</sub>
</italic> and <italic>M<sub>2</sub>
</italic>. <italic>M<sub>1</sub>
</italic> and <italic>M<sub>2</sub>
</italic> are put into the CCNet for processing. To learn more complex features, PCAT applies two convolutional layers with filter sizes of 1 &#xd7; 3 to <italic>M<sub>1</sub>
</italic> and <italic>M<sub>2</sub>
</italic>. Finally, <italic>M<sub>1</sub>
</italic> and <italic>M<sub>2</sub>
</italic> are added, and the output vector of the PCAT <italic>X&#x2019;&#x2208;R<sup>C&#xd7;W</sup>
</italic> is obtained through a squeeze operation.</p>
<p>Using CCNet to calculate the connection between each character, PCAT can assign different weights to different characters to give more attention to key characters. In addition, PCAT can solve the problem of long-distance dependency because it can calculate the degree of association between words in each position and other words that are not affected by distance. Through a parallel structure and convolutional layer, PCAT can obtain richer features from agricultural texts.</p>
</sec>
<sec id="s2_7">
<title>2.7 Evaluation indicators and experimental environment</title>
<sec id="s2_7_1">
<title>2.7.1 Parameter setting</title>
<p>In our proposed model, both the character vector dimension and word vector dimension were set to 50. In the feature encoding layer, the hidden size of both the forward and backward LSTM was set to 300, and to mitigate overfitting, the dropout rate was set to 0.5. For the model training, the batch size was set to 16. Furthermore, the model was trained using stochastic gradient descent with an initial learning rate of 0.0015, and the learning rate decay was set to 0.05. The hyper-parameter configuration of the model is listed in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>. All experiments were conducted under the conditions listed in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Hyper-parameter value.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Parameters</th>
<th valign="top" align="center">Value</th>
<th valign="top" align="center">Parameters</th>
<th valign="top" align="center">Value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">character embedding dim</td>
<td valign="top" align="center">50</td>
<td valign="top" align="left">learning rate decay</td>
<td valign="top" align="center">0.05</td>
</tr>
<tr>
<td valign="top" align="left">batchsize</td>
<td valign="top" align="center">16</td>
<td valign="top" align="left">LSTM hidden</td>
<td valign="top" align="center">300</td>
</tr>
<tr>
<td valign="top" align="left">learning rate</td>
<td valign="top" align="center">0.0015</td>
<td valign="top" align="left">dropout rate</td>
<td valign="top" align="center">0.5</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Experimental environment.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Project</th>
<th valign="top" align="center">Environment</th>
<th valign="top" align="center">Project</th>
<th valign="top" align="center">Environment</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Operating system</td>
<td valign="top" align="left">Windows 10(x64)</td>
<td valign="top" align="left">Hard disk</td>
<td valign="top" align="center">1T</td>
</tr>
<tr>
<td valign="top" align="left">CPU</td>
<td valign="top" align="left">i7-10700F@2.90GHz</td>
<td valign="top" align="left">Python version</td>
<td valign="top" align="center">3.6.5</td>
</tr>
<tr>
<td valign="top" align="left">GPU</td>
<td valign="top" align="left">NVIDIA TITANRTX (24GB)</td>
<td valign="top" align="left">Pytorch version</td>
<td valign="top" align="center">1.8.1</td>
</tr>
<tr>
<td valign="top" align="left">Memory</td>
<td valign="top" align="left">64GB</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_7_2">
<title>2.7.2 Dataset division</title>
<p>For dataset division, four datasets were involved in the experiment, namely KIWID, BOSON, ClueNER, and People&#x2019;s Daily. We obtained the public data according to <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref> in study (<xref ref-type="bibr" rid="B27">Liu et&#xa0;al., 2022</xref>). This study randomly divided KIWID, BOSON, and ClueNER into training, validation, and test sets according to a ratio of 8:1:1, respectively [refer to <xref ref-type="bibr" rid="B37">Zhang et&#xa0;al. (2021)</xref>]. Division of People&#x2019;s Daily reference <uri xlink:href="https://github.com/zjy-ucas/ChineseNER">https://github.com/zjy-ucas/ChineseNER</uri>. The pre-training corpus used in the KIWID-related experiments was the kiwifruit pre-training corpus constructed in this study. The pre-training corpus used in public dataset-related experiments is derived from Lattice-LSTM (<xref ref-type="bibr" rid="B38">Zhang and Yang, 2018</xref>), which is pre-trained using Word2vec (<xref ref-type="bibr" rid="B32">Peng et&#xa0;al., 2020</xref>) over automatically segmented Chinese Giga-Word. The number of character vectors in the public pre-training corpus is 5.7k, and the number of words in the lexicon is 704.4k.</p>
</sec>
<sec id="s2_7_3">
<title>2.7.3 Evaluation indicators</title>
<p>Precision (<italic>P</italic>), recall (<italic>R</italic>) and F<sub>1</sub>-score (<italic>F<sub>1</sub>
</italic>) were used to evaluate the performances of the different models, as shown in Equations (17)-(19).</p>
<disp-formula>
<label>(17)</label>
<mml:math display="block" id="M17">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>True&#xa0;positives</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>Predictied&#xa0;as&#xa0;positives</mml:mtext>
</mml:mrow>
</mml:mfrac>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(18)</label>
<mml:math display="block" id="M18">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>True&#xa0;positives</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>Actual&#xa0;positives</mml:mtext>
</mml:mrow>
</mml:mfrac>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(19)</label>
<mml:math display="block" id="M19">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>True positives (<italic>T<sub>P</sub>
</italic>) refer to the number of correctly recognized positive samples among all positive samples, whereas false positives (<italic>F<sub>P</sub>
</italic>) denote the number of negative samples incorrectly recognized as positive samples. False negatives (<italic>F<sub>N</sub>
</italic>) are positive samples incorrectly recognized as negative samples. Among all the positive samples, the more that are predicted correctly, the higher the <italic>P</italic> value. A higher number of positive samples predicted in the testing set yielded a higher <italic>R</italic> value. <italic>F<sub>1</sub>
</italic> is the harmonic average of <italic>P</italic> and <italic>R</italic>, providing an evaluation of the comprehensive ability of the model.</p>
</sec>
</sec>
</sec>
<sec id="s3" sec-type="results">
<title>3 Results</title>
<sec id="s3_1">
<title>3.1 Experiments on KIWID</title>
<p>In this section, some typical NER models such as BiLSTM (<xref ref-type="bibr" rid="B18">Huang et&#xa0;al., 2015</xref>), TENER (<xref ref-type="bibr" rid="B34">Yan et&#xa0;al., 2019</xref>), LR-CNN (<xref ref-type="bibr" rid="B11">Gui et&#xa0;al., 2019a</xref>), LGN (<xref ref-type="bibr" rid="B12">Gui et&#xa0;al., 2019b</xref>) and Softlexicon-LSTM (<xref ref-type="bibr" rid="B32">Peng et&#xa0;al., 2020</xref>) are considered comparable models. In addition, this section also uses the previous CANER findings JMCA-ADP (<xref ref-type="bibr" rid="B14">Guo et&#xa0;al., 2020</xref>) and Att-BiLSTM-CRF (<xref ref-type="bibr" rid="B40">Zhao et&#xa0;al., 2021</xref>) as comparison models. Like KIWINER, LR-CNN, LGN and Softlexicon-LSTM are also lexicon-based models. The lexicon used in the experiments in this section are the Kiwifruit lexicon constructed in this study.</p>
<p>The experimental results for KIWID are shown in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>. It could be observed that the model proposed in this study outperformed other models, and the <italic>F<sub>1</sub>
</italic> of this model is at least 0.47 higher than other models, which illustrates the effectiveness of it recognizing kiwifruit-related entities. The performance of our model is significantly improved compared to the baseline model BiLSTM-CRF. This is due to the fact that KIWINER makes full use of kiwifruit lexical information with the help of AttSoftlexicon, and obtains deeper semantic features with the help of PCAT. Compared with CANER models Att-BiLSTM-CRF and JMCA-ADP, KIWNER has achieved obvious improvement, which further verifies the effectiveness of KIWINER. The lexicon-based models LR-CNN, LGN, Softlexicon-LSTM and KIWINER have clear advantages over the rest of the character-based models, illustrating the effectiveness of constructing a kiwifruit-related lexicon and incorporating lexical information into the model.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Results of each model on KIWID.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">P</th>
<th valign="top" align="center">R</th>
<th valign="top" align="center">F1</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">BiLSTM-CRF</td>
<td valign="top" align="center">84.42</td>
<td valign="top" align="center">84.54</td>
<td valign="top" align="center">84.48</td>
</tr>
<tr>
<td valign="top" align="left">Att-BiLSTM-CRF</td>
<td valign="top" align="center">82.85</td>
<td valign="top" align="center">88.99</td>
<td valign="top" align="center">85.81</td>
</tr>
<tr>
<td valign="top" align="left">JMCA-ADP</td>
<td valign="top" align="center">84.90</td>
<td valign="top" align="center">90.47</td>
<td valign="top" align="center">87.59</td>
</tr>
<tr>
<td valign="top" align="left">TENER</td>
<td valign="top" align="center">86.40</td>
<td valign="top" align="center">90.19</td>
<td valign="top" align="center">88.25</td>
</tr>
<tr>
<td valign="top" align="left">LR-CNN</td>
<td valign="top" align="center">87.08</td>
<td valign="top" align="center">89.90</td>
<td valign="top" align="center">88.47</td>
</tr>
<tr>
<td valign="top" align="left">LGN</td>
<td valign="top" align="center">86.81</td>
<td valign="top" align="center">89.63</td>
<td valign="top" align="center">88.19</td>
</tr>
<tr>
<td valign="top" align="left">Softlexicon-LSTM</td>
<td valign="top" align="center">87.18</td>
<td valign="top" align="center">89.27</td>
<td valign="top" align="center">88.21</td>
</tr>
<tr>
<td valign="top" align="left">KIWINER (our)</td>
<td valign="top" align="center">88.21</td>
<td valign="top" align="center">90.31</td>
<td valign="top" align="center">88.94</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2">
<title>3.2 Experiments on public datasets</title>
<p>To verify the generalization of KIWINER, three public datasets were selected: Boson, ClueNER, and People&#x2019;s Daily. The experimental results are listed in <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Results for each model on public datasets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="left">Model</th>
<th valign="top" colspan="3" align="center">Boson</th>
<th valign="top" colspan="3" align="center">ClueNER</th>
<th valign="top" colspan="3" align="center">People&#x2019;s Daily</th>
</tr>
<tr>
<th valign="top" align="center">P</th>
<th valign="top" align="center">R</th>
<th valign="top" align="center">F<sub>1</sub>
</th>
<th valign="top" align="center">P</th>
<th valign="top" align="center">R</th>
<th valign="top" align="center">F<sub>1</sub>
</th>
<th valign="top" align="center">P</th>
<th valign="top" align="center">R</th>
<th valign="top" align="center">F<sub>1</sub>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">LSTM</td>
<td valign="top" align="center">81.78</td>
<td valign="top" align="center">72.50</td>
<td valign="top" align="center">76.86</td>
<td valign="top" align="center">76.80</td>
<td valign="top" align="center">71.28</td>
<td valign="top" align="center">73.94</td>
<td valign="top" align="center">85.96</td>
<td valign="top" align="center">82.09</td>
<td valign="top" align="center">83.98</td>
</tr>
<tr>
<td valign="top" align="left">Att-BiLSTM-CRF</td>
<td valign="top" align="center">79.93</td>
<td valign="top" align="center">76.67</td>
<td valign="top" align="center">78.27</td>
<td valign="top" align="center">74.73</td>
<td valign="top" align="center">73.62</td>
<td valign="top" align="center">74.17</td>
<td valign="top" align="center">86.28</td>
<td valign="top" align="center">85.05</td>
<td valign="top" align="center">85.66</td>
</tr>
<tr>
<td valign="top" align="left">JMCA-ADP</td>
<td valign="top" align="center">80.10</td>
<td valign="top" align="center">77.66</td>
<td valign="top" align="center">78.86</td>
<td valign="top" align="center">75.82</td>
<td valign="top" align="center">76.58</td>
<td valign="top" align="center">76.20</td>
<td valign="top" align="center">87.96</td>
<td valign="top" align="center">86.93</td>
<td valign="top" align="center">87.44</td>
</tr>
<tr>
<td valign="top" align="left">TENER</td>
<td valign="top" align="center">79.45</td>
<td valign="top" align="center">81.51</td>
<td valign="top" align="center">80.47</td>
<td valign="top" align="center">74.34</td>
<td valign="top" align="center">77.08</td>
<td valign="top" align="center">75.68</td>
<td valign="top" align="center">90.36</td>
<td valign="top" align="center">90.07</td>
<td valign="top" align="center">90.22</td>
</tr>
<tr>
<td valign="top" align="left">LR-CNN</td>
<td valign="top" align="center">84.40</td>
<td valign="top" align="center">82.04</td>
<td valign="top" align="center">83.20</td>
<td valign="top" align="center">80.09</td>
<td valign="top" align="center">78.47</td>
<td valign="top" align="center">79.27</td>
<td valign="top" align="center">91.13</td>
<td valign="top" align="center">90.74</td>
<td valign="top" align="center">90.93</td>
</tr>
<tr>
<td valign="top" align="left">LGN</td>
<td valign="top" align="center">82.16</td>
<td valign="top" align="center">79.16</td>
<td valign="top" align="center">80.63</td>
<td valign="top" align="center">77.01</td>
<td valign="top" align="center">73.95</td>
<td valign="top" align="center">75.45</td>
<td valign="top" align="center">90.75</td>
<td valign="top" align="center">89.52</td>
<td valign="top" align="center">90.13</td>
</tr>
<tr>
<td valign="top" align="left">Softlexicon-LSTM</td>
<td valign="top" align="center">85.75</td>
<td valign="top" align="center">80.67</td>
<td valign="top" align="center">83.13</td>
<td valign="top" align="center">80.50</td>
<td valign="top" align="center">79.11</td>
<td valign="top" align="center">79.80</td>
<td valign="top" align="center">92.31</td>
<td valign="top" align="center">90.43</td>
<td valign="top" align="center">91.36</td>
</tr>
<tr>
<td valign="top" align="left">KIWINER</td>
<td valign="top" align="center">86.96</td>
<td valign="top" align="center">83.37</td>
<td valign="top" align="center">85.13</td>
<td valign="top" align="center">81.05</td>
<td valign="top" align="center">80.01</td>
<td valign="top" align="center">80.52</td>
<td valign="top" align="center">93.23</td>
<td valign="top" align="center">92.42</td>
<td valign="top" align="center">92.82</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The KIWINER model achieved the best <italic>F<sub>1</sub>
</italic> of the three datasets, which were for Boston, ClueNER, and People&#x2019;s Daily 85.13%, 80.52%, and 92.82%, respectively. The experimental results show that KIWINER not only has performance advantages on the KIWID corpus, but also has a certain generalization in other fields.</p>
</sec>
<sec id="s3_3">
<title>3.3 Ablation experiments</title>
<sec id="s3_3_1">
<title>3.3.1 Effectiveness of new word detection layer</title>
<p>In the new word detection layer of KIWINER, the adaptability of Jieba to kiwifruit-related texts was enhanced by new word detection and then a lexicon was constructed by word segmentation of kiwifruit-related texts. To verify the effectiveness of this lexicon construction method, this section used several commonly used Chinese automatic word segmentation tools (Pkuseg, Thulac, HanLP, Jieba, and Snownlp) to automatically separate the kiwifruit-related texts collected in this study to construct lexicons and apply them to KIWINER for experiments. Experiments were performed using KIWID. The experimental results are shown in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5A</bold>
</xref>.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>
<bold>(A, B)</bold> Results for each lexicon construction method &amp; Effectiveness of AttSoftlexicon.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1053449-g005.tif"/>
</fig>
<p>The method of constructing the lexicon with the aid of new word detection and Jieba achieves the highest <italic>P</italic>, <italic>R</italic>, and <italic>F<sub>1</sub>
</italic>, and improves over other methods. This shows that new word detection effectively reduces the negative impact of word segmentation errors on CANER during lexicon construction.</p>
</sec>
<sec id="s3_3_2">
<title>3.3.2 Effectiveness of AttSoftlexicon layer</title>
<p>To verify the effectiveness of the AttSoftlexicon, it was replaced in KIWINER by Softlexicon (<xref ref-type="bibr" rid="B32">Peng et&#xa0;al., 2020</xref>), and a comparative experiment was conducted. The experiment used the <italic>F<sub>1</sub>
</italic> as the evaluation metric, and the experimental results are shown in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5B</bold>
</xref>. The KIWINER model achieved the best <italic>F<sub>1</sub>
</italic> for the four datasets. This shows that by assigning different weights to different word set representations, the AttSoftlexicon can help the model to make full use of the position information of characters in its matched words, thus making more full use of lexicon information than Softlexicon.</p>
</sec>
<sec id="s3_3_3">
<title>3.3.3 Effectiveness of PCAT layer</title>
<p>To verify the applicability of the PCAT module for different sequence encoding models, experiments were performed using transformer and GRU instead of BiLSTM. And comparative experiments were carried out with or without the PCAT module in the model. The experiment was divided into three groups, and the results are presented in <xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref>.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Application effect of PCAT.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Group</th>
<th valign="top" align="center">Model</th>
<th valign="top" align="center">F<sub>1</sub>
</th>
<th valign="top" align="center">Model</th>
<th valign="top" align="center">F<sub>1</sub>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="center">AttSoftlexicon-Transformer-CRF</td>
<td valign="top" align="center">84.01</td>
<td valign="top" align="center">AttSoftlexicon-Transformer-PCAT-CRF</td>
<td valign="top" align="center">85.11</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="center">AttSoftlexicon-BiGRU-CRF</td>
<td valign="top" align="center">87.68</td>
<td valign="top" align="center">AttSoftlexicon-BiGRU-PCAT-CRF</td>
<td valign="top" align="center">88.85</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="center">AttSoftlexicon-BiLSTM-CRF</td>
<td valign="top" align="center">87.17</td>
<td valign="top" align="center">AttSoftlexicon-BiLSTM-PCAT-CRF(KIWINER)</td>
<td valign="top" align="center">88.94</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The effect of each sequence coding model in the table improved after the introduction of PCAT, indicating the effectiveness and universality of PCAT. The model based on BiLSTM achieved the best effect, which shows the rationality of KIWINER using BiLSTM to encode character sequences.</p>
</sec>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<title>4 Discussion</title>
<sec id="s4_1">
<title>4.1 Comparison of experiments with different variants</title>
<p>To verify the rationality of the PCAT module, several variants of it were designed, and the variant was used to replace the PCAT in KIWINER for experiments on Boson, ClueNER, KIWID, and People&#x2019;s Daily. Variants A and B increased and decreased the depth of the PCAT, respectively. Variants C and D break the parallel connection structure of PCAT. The different variant structures of the PCAT are shown in <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref>. In addition, many researchers use the self-attention mechanism (Self-Att) to improve the feature extraction ability of the sequence encoding layer. In the field of CANER, <xref ref-type="bibr" rid="B14">Guo et&#xa0;al. (2020)</xref> introduced a self-attention module after the BiLSTM model to improve the feature extraction ability of sequence coding layer. Therefore, this section refers to the study by <xref ref-type="bibr" rid="B14">Guo et&#xa0;al. (2020)</xref> and uses Self-Att instead of PCAT for experiments. Attention unit and head number of Self-Att is 600 and 8. The experimental results are listed in <xref ref-type="table" rid="T7">
<bold>Table&#xa0;7</bold>
</xref>.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Variants of PCAT.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1053449-g006.tif"/>
</fig>
<table-wrap id="T7" position="float">
<label>Table&#xa0;7</label>
<caption>
<p>Results for several variants of PCAT.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Module</th>
<th valign="top" align="center">Boson</th>
<th valign="top" align="center">ClueNER</th>
<th valign="top" align="center">KIWID</th>
<th valign="top" align="center">People&#x2019;s Daily</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Self-Att</td>
<td valign="top" align="center">83.35</td>
<td valign="top" align="center">79.73</td>
<td valign="top" align="center">88.12</td>
<td valign="top" align="center">91.71</td>
</tr>
<tr>
<td valign="top" align="left">Variant A</td>
<td valign="top" align="center">84.03</td>
<td valign="top" align="center">80.26</td>
<td valign="top" align="center">88.41</td>
<td valign="top" align="center">92.34</td>
</tr>
<tr>
<td valign="top" align="left">Variant B</td>
<td valign="top" align="center">84.24</td>
<td valign="top" align="center">80.41</td>
<td valign="top" align="center">88.62</td>
<td valign="top" align="center">92.58</td>
</tr>
<tr>
<td valign="top" align="left">Variant C</td>
<td valign="top" align="center">84.07</td>
<td valign="top" align="center">80.32</td>
<td valign="top" align="center">88.22</td>
<td valign="top" align="center">92.69</td>
</tr>
<tr>
<td valign="top" align="left">Variant D</td>
<td valign="top" align="center">84.61</td>
<td valign="top" align="center">80.34</td>
<td valign="top" align="center">88.83</td>
<td valign="top" align="center">92.56</td>
</tr>
<tr>
<td valign="top" align="left">PCAT</td>
<td valign="top" align="center">
<bold>84.95</bold>
</td>
<td valign="top" align="center">
<bold>80.95</bold>
</td>
<td valign="top" align="center">
<bold>88.94</bold>
</td>
<td valign="top" align="center">
<bold>92.82</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>The values in bold represent the maximum value in the same column.</p>
</table-wrap-foot>
</table-wrap>
<p>Compared with Variants A and B, the PCAT achieved better results, indicating that the depth design of the PCAT is reasonable. Compared with variables C and D, PCAT achieves better results, which shows that a parallel structure can effectively improve the feature extraction ability of the model and help the model obtain richer semantic information. PCAT achieves better results than Self-Att (<xref ref-type="bibr" rid="B14">Guo et&#xa0;al., 2020</xref>), which indicates that PCAT is more conducive to improving the model feature extraction capability than the commonly used module Self-Att. PCAT constructs a parallel structure with the help of two different convolutional layers, which allows the model to simultaneously process semantic information from two different perspectives. At the same time, with the help of CCNet, which has good long distance context semantic aggregation capability (<xref ref-type="bibr" rid="B17">Huang et&#xa0;al., 2019</xref>), the information can be processed again, and different weights can be given according to different information relationships. Therefore, PCAT can help the model make full use of the feature information input into the model.</p>
</sec>
<sec id="s4_2">
<title>4.2 Comparative analysis with the previous CANER findings</title>
<p>This section discusses the recognition effects of KIWNER and the previous CANER studys Att-BiLSTM-CRF (<xref ref-type="bibr" rid="B40">Zhao et&#xa0;al., 2021</xref>) and JMCA-ADP (<xref ref-type="bibr" rid="B14">Guo et&#xa0;al., 2020</xref>) on each category of the kiwifruit dataset KIWID. BiLSTM-CRF (<xref ref-type="bibr" rid="B18">Huang et&#xa0;al., 2015</xref>), the baseline model of the above models, also participated in the experiments. The experimental results are shown in <xref ref-type="table" rid="T8">
<bold>Table&#xa0;8</bold>
</xref>, where <italic>F<sub>1</sub>
</italic> is chosen as the evaluation metric, and the last column of the table is the running time of each model.</p>
<table-wrap id="T8" position="float">
<label>Table&#xa0;8</label>
<caption>
<p>Entity categories study.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Entity type</th>
<th valign="top" align="center">BiLSTM</th>
<th valign="top" align="center">Att-BiLSTM-CRF</th>
<th valign="top" align="center">JMCA-ADP</th>
<th valign="top" align="center">KIWINER</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">KIWI</td>
<td valign="top" align="center">83.70</td>
<td valign="top" align="center">80.00</td>
<td valign="top" align="center">85.90</td>
<td valign="top" align="center">87.06</td>
</tr>
<tr>
<td valign="top" align="left">DIS</td>
<td valign="top" align="center">79.17</td>
<td valign="top" align="center">78.43</td>
<td valign="top" align="center">81.63</td>
<td valign="top" align="center">87.50</td>
</tr>
<tr>
<td valign="top" align="left">PEST</td>
<td valign="top" align="center">77.00</td>
<td valign="top" align="center">85.71</td>
<td valign="top" align="center">86.96</td>
<td valign="top" align="center">89.76</td>
</tr>
<tr>
<td valign="top" align="left">LOC</td>
<td valign="top" align="center">81.90</td>
<td valign="top" align="center">82.64</td>
<td valign="top" align="center">83.69</td>
<td valign="top" align="center">84.19</td>
</tr>
<tr>
<td valign="top" align="left">PART</td>
<td valign="top" align="center">94.44</td>
<td valign="top" align="center">94.23</td>
<td valign="top" align="center">94.43</td>
<td valign="top" align="center">96.80</td>
</tr>
<tr>
<td valign="top" align="left">MED</td>
<td valign="top" align="center">61.64</td>
<td valign="top" align="center">70.45</td>
<td valign="top" align="center">73.33</td>
<td valign="top" align="center">75.28</td>
</tr>
<tr>
<td valign="top" align="left">All category</td>
<td valign="top" align="center">84.48</td>
<td valign="top" align="center">85.81</td>
<td valign="top" align="center">87.59</td>
<td valign="top" align="center">88.94</td>
</tr>
<tr>
<td valign="top" align="left">Time(s/epoch)</td>
<td valign="top" align="center">139.14</td>
<td valign="top" align="center">149.20</td>
<td valign="top" align="center">144.13</td>
<td valign="top" align="center">163.87</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>It can be clearly seen from the table that KIWNER has achieved the best results in each category, especially in disease, pest, pesticide, which contain strong domain features. Although Att-BiLSTM-CRF and JMCA-ADP have made efforts to integrate agricultural features into the model, KIWINER can obtain more agricultural features by using word information with the help of Attsoftlexicon and new word detection. In addition, PCAT can help the model to further make full use of these agricultural features. The category of location related entities usually contain boundary characters, such as &#x201c;&#x53bf;&#x201d; (county), &#x201c;&#x9547;&#x201d; (town), &#x201c;&#x6751;&#x201d; (village), etc., and the category of part related entities have limited diversity and many repeated words, which leads to the recognition difficulty of the above two categories being relatively low. Therefore, KIWINER did not significantly improve the recognition effect of LOC and PART. From the last row of the table, we can see that KIWINER takes more time than other models, which is a disadvantage of KIWINER. KIWNER incorporates lexical information, so it will spend an extra part of time on processing lexical information compared with the character based model. Research on faster character and word matching methods and more efficient sequence encoding modules can be helpful to overcome this shortcoming.</p>
<p>In KIWNER, AttSoftlexicon module and PCAT module both adopt the CCNet model from semantic segmentation, and have achieved good results through experimental verification. With the help of new word detection and AttSoftlexicon, KIWINER incorporate the word information containing domain features into the model. And KIWINER has achieved significant improvement compared with previous which are character-based models. This shows that when solving problems with strong domain features such as CANER, it is a good solution to find a method to integrate more domain features into the model. In addition, the effectiveness of PCAT also shows the importance of making full use of these features.</p>
</sec>
</sec>
<sec id="s5">
<title>5 Conclusion</title>
<p>To address the lack of an annotation dataset for agricultural named entity recognition in the kiwifruit field, a kiwi-annotated NER corpus KIWID, which contains six categories and 17089 entities was constructed in this study. According to the characteristics of kiwifruit-related texts, a new CANER model, KIWINER, was proposed by statistics-based new word detection and the novel module AttSoftlexicon, PCAT. To alleviate the word segmentation insensitivity caused by the strong specialization of kiwifruit-related texts, statistics-based new word detection was used to enrich the built-in vocabulary of Jieba and improve its applicability to kiwifruit texts to construct the kiwifruit lexicon. Inspired by the CCNet module in the field of semantic segmentation, the AttSoftlexicon was proposed to help the model make efficient use of lexicon information. In addition, this study proposes a PCAT module to improve the feature extraction ability of the sequence coding layer BiLSTM. The experimental results with the comparative models show that our proposed model can effectively improve CANER performance, particularly for difficult-to-recognize categories such as diseases, pests, and farm chemicals.</p>
<p>Moreover, our research can provide reference for developing new deep learning methods for named entity recognition of international texts. Theoretically, our construction method of Attsoftlexicon is also applicable for the named entity recognition of the texts of other similar languages, such as Japanese, Korean etc., which are unnaturally partitioned just like Chinese. In addition, our proposed PCAT module is used to improve the sequence encoding ability of deep learning model essentially. So, applying our proposed PCAT module for the named entity recognition of other language is also theoretically feasible. Therefore, KIWINER can also be used to explore CNER tasks in other crops or other fields with domain features. In the future, we will study how to improve the time efficiency of KIWINER and use it in the construction of kiwifruit Q&amp;A system.</p>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary materials. Further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>Conceptualization, LZ. Methodology, LZ. Validation, LZ. Formal analysis, LZ and XN. Investigation, LZ, XN, HZ, VG, CR and DN. Data curation, LZ, MZ and MG. Writing&#x2014;original draft preparation, LZ. Writing&#x2014;review and editing LZ and DN. Visualization, LZ and XN. Supervision, HZ and DN. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>This work was supported by the National Key R&amp;D Program of China under grant 2020YFD1100601.</p>
</sec>
<sec id="s9" sec-type="acknowledgement">
<title>Acknowledgments</title>
<p>We thank all of the funders and all reviewers.</p>
</sec>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be&#xa0;construed as a potential conflict of interest.</p>
</sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aamir</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Bazai</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wagan</surname> <given-names>R. A.</given-names>
</name>
<name>
<surname>Bhatti</surname> <given-names>U. A.</given-names>
</name>
<name>
<surname>Nizamani</surname> <given-names>M. M.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Spatiotemporal change of air-quality patterns in hubei province&#x2013;a pre-to post-COVID-19 analysis using path analysis and regression</article-title>. <source>Atmosphere</source> <volume>12</volume> (<issue>10</issue>), <elocation-id>1338</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/atmos12101338</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Aguilar</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Maharjan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Monroy</surname> <given-names>A. P. L.</given-names>
</name>
<name>
<surname>Solorio</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>A multi-task approach for named entity recognition in social media data</article-title>,&#x201d; in <conf-name>Proceedings of the 3rd Workshop on Noisy User-generated Text</conf-name>, , <conf-date>Copenhagen, Denmark</conf-date>
<conf-date>7 September</conf-date>. <fpage>148</fpage>&#x2013;<lpage>153</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/W17-4419</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bhatti</surname> <given-names>U. A.</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Nawaz</surname> <given-names>S. A.</given-names>
</name>
<name>
<surname>Mehmood</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Bhatti</surname> <given-names>M. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Predictive data modeling using sp-kNN for risk factor evaluation in urban demographical healthcare data</article-title>. <source>J. Med. Imaging Health Inf.</source> <volume>11</volume> (<issue>1</issue>), <fpage>7</fpage>&#x2013;<lpage>14</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1166/jmihi.2021.3313</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bhatti</surname> <given-names>U. A.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Nawaz</surname> <given-names>S. A.</given-names>
</name>
<name>
<surname>Mehmood</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>K.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>a). <article-title>Hybrid watermarking algorithm using clifford algebra with Arnold scrambling and chaotic encryption</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>76386</fpage>&#x2013;<lpage>76398</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2020.2988298</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bhatti</surname> <given-names>U. A.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zeeshan</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Nawaz</surname> <given-names>S. A.</given-names>
</name>
<name>
<surname>Bhatti</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>b). <article-title>Geometric algebra applications in geospatial artificial intelligence and remote sensing image processing</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>155783</fpage>&#x2013;<lpage>155796</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2020.3018544</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Biswas</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Sharan</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A noble approach for recognition and classification of agricultural named entities using Word2Vec</article-title>. <source>Int. J. Adv. Stud. Comput. Sci. Eng.</source> <volume>9</volume> (<issue>12</issue>), <fpage>1</fpage>&#x2013;<lpage>8</lpage>.</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chiu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Nichols</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Named entity recognition with bidirectional LSTM-CNNs</article-title>. <source>Trans. Assoc. Comput. Linguist.</source> <volume>4</volume>, <fpage>357</fpage>&#x2013;<lpage>370</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1162/tacl_a_00104</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Drury</surname> <given-names>B. M.</given-names>
</name>
<name>
<surname>Roche</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A survey of the applications of text mining for agriculture</article-title>. <source>Comput. Electron. Agric.</source> <volume>163</volume>, <elocation-id>104864</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2019.104864</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Du</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>New word detection based on an improved PMI algorithm for enhancing segmentation system</article-title>. <source>Acta Scientiarum Naturalium Universitatis Pekinensis.</source> <volume>52</volume> (<issue>1</issue>), <fpage>35</fpage>&#x2013;<lpage>40</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.13209/j.0479-8023.2016.024</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Galvan</surname> <given-names>L. C.</given-names>
</name>
<name>
<surname>Bhatti</surname> <given-names>U. A.</given-names>
</name>
<name>
<surname>Campo</surname> <given-names>C. C.</given-names>
</name>
<name>
<surname>Trujillo</surname> <given-names>R. S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>The nexus between CO2 emission, economic growth, trade openness: Evidences from middle-income trap countries</article-title>. <source>Front. Environ. Sci.</source> <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fenvs.2022.938776</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Gui</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>a). &#x201c;<article-title>CNN-Based Chinese NER with lexicon rethinking</article-title>,&#x201d; in <conf-name>Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence</conf-name>, <conf-loc>Macao, China</conf-loc>, <fpage>4982</fpage>&#x2013;<lpage>4988</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.24963/ijcai.2019/692</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Gui</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zou</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Peng</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>b). &#x201c;<article-title>A lexicon-based graph neural network for Chinese NER</article-title>,&#x201d; in <conf-name>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing</conf-name>, <conf-loc>Hongkong, China</conf-loc>, <fpage>1040</fpage>&#x2013;<lpage>1050</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/D19-1096</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Tang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Bai</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Diao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>CG-ANER: Enhanced contextual embeddings and glyph features-based agricultural named entity recognition</article-title>. <source>Comput. Electron. Agric.</source> <volume>194</volume>, <elocation-id>106776</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2022.106776</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Su</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Hao</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Chinese Agricultural diseases and pests named entity recognition with multi-scale local context features and self-attention mechanism</article-title>. <source>Comput. Electron. Agric.</source> <volume>179</volume>, <elocation-id>105830</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2020.105830</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>A unified model for cross-domain and semi-supervised named entity recognition in Chinese social media</article-title>,&#x201d; in <conf-name>Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence</conf-name>, <conf-loc>San Francisco, USA</conf-loc>, <fpage>3216</fpage>&#x2013;<lpage>3222</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1609/aaai.v31i1.10977</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>J. H.</given-names>
</name>
<name>
<surname>Powers</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2003</year>). &#x201c;<article-title>Chinese Word segmentation based on contextual entropy</article-title>,&#x201d; in <conf-name>Proceedings of the 17th Pacific Asia Conference on Language, Information and Computation</conf-name>, (<publisher-loc>Sentosa, Singapore</publisher-loc>: <publisher-name>COLIPS PUBLICATIONS</publisher-name>) <fpage>152</fpage>&#x2013;<lpage>158</lpage>.</citation>
</ref>
<ref id="B17">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>CCNet: Criss-cross attention for semantic segmentation</article-title>,&#x201d; in <conf-name>Proceedings of the International Conference on Computer Vision</conf-name>, <conf-loc>Seoul, South Korea</conf-loc>, <fpage>603</fpage>&#x2013;<lpage>612</lpage>.</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Kai</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Bidirectional LSTM-CRF models for sequence tagging</article-title>. <source>arXiv</source> [Preprint]. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1508.01991</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Huiming</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Xiaojing</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Baobao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Shiwen</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2003</year>). &#x201c;<article-title>Chinese Word segmentation at peking university</article-title>,&#x201d; in <conf-name>Proceedings of the Second Workshop on Chinese Language Processing</conf-name>, <conf-loc>Sapporo, Japan</conf-loc>, <fpage>152</fpage>&#x2013;<lpage>155</lpage>.</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zong</surname> <given-names>C. H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A comprehensive understanding with the importance of popular science knowledge to the kiwifruit quality improvement</article-title>. <source>China Fruits.</source> <volume>01</volume>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.16626/j.cnki.issn1000-8047.2020.01.001</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Jingzhou</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Houfeng</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2008</year>). &#x201c;<article-title>Chinese Named entity recognition and word segmentation based on character</article-title>,&#x201d; in <conf-name>Proceedings of the Third International Joint Conference on Natural Language Processing</conf-name>, <conf-loc>Hyderabad, India</conf-loc>, <fpage>128</fpage>&#x2013;<lpage>132</lpage>.</citation>
</ref>
<ref id="B22">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Jin</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Tanaka-Ishii</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2006</year>). &#x201c;<article-title>Unsupervised segmentation of Chinese text by use of branching entropy</article-title>,&#x201d; in <conf-name>Proceedings of the 21st International Conference on Computational Linguistics</conf-name>, <conf-loc>Sydney, Australia</conf-loc>, <fpage>428</fpage>&#x2013;<lpage>435</lpage>.</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kong</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Incorporating multi-level CNN and attention mechanism for Chinese clinical named entity recognition</article-title>. <source>J. Biomed. Inform.</source> <volume>116</volume>, <elocation-id>103737</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jbi.2021.103737</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Xiang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Towards Chinese clinical named entity recognition by dynamic embedding using domain-specific knowledge</article-title>. <source>J. Biomed. Inform.</source> <volume>106</volume>, <elocation-id>103435</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jbi.2020.103435</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Ju</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Adversarial active learning for named entity recognition in cybersecurity</article-title>. <source>Comput. Mater. Continua.</source> <volume>66</volume>, <fpage>407</fpage>&#x2013;<lpage>420</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.32604/CMC.2020.012023</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Thiruvady</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A hybrid deep-learning approach for complex biochemical named entity recognition</article-title>. <source>Knowledge-based Systems.</source> <volume>221</volume>, <elocation-id>106958</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.knosys.2021.106958</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Chinese Named entity recognition: The state of the art</article-title>. <source>Neurocomputing</source> <volume>473</volume>, <fpage>37</fpage>&#x2013;<lpage>53</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.neucom.2021.10.101</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>Chinese Named entity recognition with a sequence labeling approach: based on characters, or based on words</article-title>?,&#x201d; in <conf-name>Proceedings of the 6th International Conference on Intelligent Computing</conf-name>, <conf-loc>Changsha, China</conf-loc>, <fpage>634</fpage>&#x2013;<lpage>640</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-642-14932-0_78</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Recognition of crops, diseases and pesticides named entities in Chinese based on conditional random fields</article-title>. <source>Trans. Chin. Soc. Agric. Machinery</source> <volume>48</volume> (<issue>s1</issue>), <fpage>178</fpage>&#x2013;<lpage>185</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.6041/j.issn.1000-1298.2017.S0.029</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mikolov</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Corrado</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Dean</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Efficient estimation of word representations in vector space</article-title>. <source>arXiv</source> [Preprint]. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1301.3781</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nawaz</surname> <given-names>S. A.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Bhatti</surname> <given-names>U. A.</given-names>
</name>
<name>
<surname>Bazai</surname> <given-names>S. U.</given-names>
</name>
<name>
<surname>Zafar</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Bhatti</surname> <given-names>M. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>A hybrid approach to forecast the COVID-19 epidemic trend</article-title>. <source>PloS One</source> <volume>16</volume> (<issue>10</issue>), <elocation-id>e0256971</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0256971</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Peng</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Simplify the usage of lexicon in Chinese NER</article-title>,&#x201d; in <conf-name>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</conf-name>, <fpage>5951</fpage>&#x2013;<lpage>5960</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1908.05969</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ratinov</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Roth</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2009</year>). &#x201c;<article-title>Design challenges and misconceptions in named entity recognition</article-title>,&#x201d; in <conf-name>Proceedings of the Thirteenth Conference on Computational Natural Language Learning</conf-name>, <conf-loc>Boulder, USA</conf-loc>. <fpage>147</fpage>&#x2013;<lpage>155</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3115/1596374.1596399</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yan</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Qiu</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>TENER: adapting transformer encoder for named entity recognition</article-title>. <source>arXiv</source> [Preprint]. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1911.04474</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Teng</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Combining discrete and neural features for sequence labeling</article-title>,&#x201d; in <conf-name>Proceedings of the 17th International Conference</conf-name>, <conf-loc>Konya Turkey</conf-loc>, <fpage>140</fpage>&#x2013;<lpage>154</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1708.07279</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ye</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Chow</surname> <given-names>K. P.</given-names>
</name>
<name>
<surname>Hui</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Yiu</surname> <given-names>S. M.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Unknown Chinese word extraction based on variety of overlapping strings</article-title>. <source>Inf. Process. Manag.</source> <volume>49</volume>, <fpage>497</fpage>&#x2013;<lpage>512</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ipm.2012.09.004</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Geng</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Geng</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Chinese Named entity recognition for apple diseases and pests based on character augmentation</article-title>. <source>Comput. Electron. Agric.</source> <volume>190</volume>, <elocation-id>106464</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2021.106464</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Chinese NER using lattice LSTM</article-title>,&#x201d; in <conf-name>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics</conf-name>, <conf-loc>Melbourne, Australia</conf-loc>, <fpage>1554</fpage>&#x2013;<lpage>1564</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1805.02023</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Adversarial training based lattice LSTM for Chinese clinical named entity recognition</article-title>. <source>J. Biomed. Inform.</source> <volume>99</volume>, <elocation-id>103290</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jbi.2019.103290</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Named entity recognition of Chinese agricultural text based on attention mechanism</article-title>. <source>Trans. Chin. Soci-ety Agric. Machinery</source> <volume>52</volume> (<issue>1</issue>), <fpage>185</fpage>&#x2013;<lpage>192</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.6041/j.issn.1000-1298.2021.01.021</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname> <given-names>J. H.</given-names>
</name>
<name>
<surname>Wen-Hua</surname> <given-names>L. I.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>A study on automatic identification for Internet new words according to word-building rule</article-title>. <source>J. Shanxi Univ. (Natural Sci. Edition)</source> <volume>25</volume>, <fpage>115</fpage>&#x2013;<lpage>119</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.13451/j.cnki.shanxi.univ(nat.sci.).2002.02.007</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>