<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2023.1330527</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Tomato leaf disease recognition based on multi-task distillation learning</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Bo</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2403982"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wei</surname>
<given-names>Shusen</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2560981"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Fan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Guo</surname>
<given-names>Nawei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Fan</surname>
<given-names>Hongyu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2393442"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Yao</surname>
<given-names>Wei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2561541"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>    <aff id="aff1">
<sup>1</sup>
<institution>College of Information Science and Technology, Hebei Agricultural University</institution>, <addr-line>Baoding</addr-line>, <country>China</country>
</aff>    <aff id="aff2">
<sup>2</sup>
<institution>Hebei Key Laboratory of Agricultural Big Data</institution>, <addr-line>Baoding</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Dun Wang, Northwest A&amp;F University, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Olarik Surinta, Mahasarakham University, Thailand</p>
<p>Guoxiong Zhou, Central South University Forestry and Technology, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Wei Yao, <email xlink:href="mailto:YaoWei-hebau@hotmail.com">YaoWei-hebau@hotmail.com</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>30</day>
<month>01</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1330527</elocation-id>
<history>
<date date-type="received">
<day>31</day>
<month>10</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>12</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Liu, Wei, Zhang, Guo, Fan and Yao</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Liu, Wei, Zhang, Guo, Fan and Yao</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Tomato leaf diseases can cause major yield and quality losses. Computer vision techniques for automated disease recognition show promise but face challenges like symptom variations, limited labeled data, and model complexity.</p>
</sec>
<sec>
<title>Methods</title>
<p>Prior works explored hand-crafted and deep learning features for tomato disease classification and multi-task severity prediction, but did not sufficiently exploit the shared and unique knowledge between these tasks. We present a novel multi-task distillation learning (MTDL) framework for comprehensive diagnosis of tomato leaf diseases. It employs knowledge disentanglement, mutual learning, and knowledge integration through a multi-stage strategy to leverage the complementary nature of classification and severity prediction.</p>
</sec>
<sec>
<title>Results</title>
<p>Experiments show our framework improves performance while reducing model complexity. The MTDL-optimized EfficientNet outperforms single-task ResNet101 in classification accuracy by 0.68% and severity estimation by 1.52%, using only 9.46% of its parameters.</p>
</sec>
<sec>
<title>Discussion</title>
<p>The findings demonstrate the practical potential of our framework for intelligent agriculture applications.</p>
</sec>
</abstract>
<kwd-group>
<kwd>multi-task learning</kwd>
<kwd>knowledge distillation</kwd>
<kwd>tomato leaf diseases</kwd>
<kwd>disease classification</kwd>
<kwd>severity prediction</kwd>
</kwd-group>
<counts>
<fig-count count="10"/>
<table-count count="8"/>
<equation-count count="21"/>
<ref-count count="69"/>
<page-count count="19"/>
<word-count count="11048"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Sustainable and Intelligent Phytoprotection</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>    <p>Tomato is one of the most widely cultivated vegetables in the world, with its versatility extending to various applications such as a culinary ingredient (<xref ref-type="bibr" rid="B34">Kumar et&#xa0;al., 2022</xref>), an industrial raw material (<xref ref-type="bibr" rid="B7">Botine&#x15f;tean et&#xa0;al., 2015</xref>), a component in cosmetics (<xref ref-type="bibr" rid="B47">Septiyanti and Meliana, 2020</xref>), and medicinal uses (<xref ref-type="bibr" rid="B35">Kumar et&#xa0;al., 2012</xref>). However, tomato diseases can rapidly spread through a field if not identified and managed in a timely manner, leading to substantial losses in both yield and quality of the crop (<xref ref-type="bibr" rid="B64">Zhang et&#xa0;al., 2022</xref>). As symptoms of many tomato diseases can appear on the leaves, leveraging computer vision techniques for automated recognition of leaf diseases has attracted widespread attention from researchers (<xref ref-type="bibr" rid="B8">Boulent et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B16">Habib et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B40">Nanehkaran et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B45">Roy and Bhaduri, 2021</xref>; <xref ref-type="bibr" rid="B1">Albahli and Nawaz, 2022</xref>; <xref ref-type="bibr" rid="B17">Harakannanavar et&#xa0;al., 2022</xref>). Although these techniques effectively improve the accuracy and speed of disease diagnosis, they also present challenges. These include variations in disease symptoms and lighting conditions (<xref ref-type="bibr" rid="B65">Zhang et&#xa0;al., 2018a</xref>), difficulty in collecting enough disease samples (<xref ref-type="bibr" rid="B66">Zhang et&#xa0;al., 2021</xref>), varying levels of disease severity (<xref ref-type="bibr" rid="B57">Wang et&#xa0;al., 2021</xref>), and limitations in computing power (<xref ref-type="bibr" rid="B6">Bi et&#xa0;al., 2022</xref>). Such factors potentially influence the applicability of the learning models.</p>
<p>Most of the computer vision-based leaf disease recognition methods are mainly divided into two categories: hand-crafted feature-based methods and deep learning-based methods. Traditionally, hand-crafted features refer to the manual extraction of specific features such as textures, colors, shapes, and sizes from leaf images. These features are then used as input for training a classifier to identify the presence of plant diseases. The utilization of classical classifiers, such as support vector machines (SVM) (<xref ref-type="bibr" rid="B11">Cortes and Vapnik, 1995</xref>) and random forests (RF) (<xref ref-type="bibr" rid="B9">Breiman, 2001</xref>), has been instrumental in leaf disease identification, owing to their robust nature in handling high-dimensional, noisy, and missing data (<xref ref-type="bibr" rid="B43">Patil et&#xa0;al., 2017</xref>). Consequently, the research community has significantly focused on developing improved methods for feature extraction to enhance recognition performance. <xref ref-type="bibr" rid="B39">Mokhtar et&#xa0;al. (2015)</xref> employed geometric features and histogram features for classifying two tomato leaf viruses, achieving the highest accuracy of 91.5% using the Quadratic kernel function. <xref ref-type="bibr" rid="B38">Meenakshi et&#xa0;al. (2019)</xref> improved plant leaf disease identification using exact Legendre moments shape descriptors, with a high accuracy of 99.1% on three tomato diseases (early and late blight and mosaic). In <xref ref-type="bibr" rid="B44">Rahman et&#xa0;al. (2022)</xref>, texture features from tomato leaf images were analyzed using a gray level co-occurrence matrix (GLCM). In addition to single-type features, hybrid features have been well-studied. <xref ref-type="bibr" rid="B48">Sharif et&#xa0;al. (2018)</xref> proposed a hybrid method for automatic detection and classification of six types of diseases in citrus plants, which used color, texture, and geometric features combined in a codebook and selected by PCA score, entropy, and skewness-based covariance vector before being fed to a multi-class SVM. Similarly, <xref ref-type="bibr" rid="B4">Basavaiah and Arlene Anthony (2020)</xref> recognized four main diseases in tomato plants through the fusion of multiple features, including color histograms, Hu Moments, Haralick, and local binary pattern, resulting in 94% accuracy achieved by a RF classifier. In summary, hand-crafted feature-based methods are highly valued for their simplicity and interpretability, as well as they have demonstrated satisfactory performance on small to medium-sized datasets. However, they struggle to scale up large and diverse datasets, and fall short in coping with biases and noises in the data distribution, leading to decreased accuracy and robustness in real-world applications.</p>
<p>Recently, deep learning has revolutionized the field of computer vision, resulting in significant improvements in detecting leaf diseases (<xref ref-type="bibr" rid="B53">Sujatha et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B49">Shoaib et&#xa0;al., 2022</xref>). For instance, a novel tomato leaf disease recognition framework was proposed, which used binary Wavelet transform for image preprocessing to remove noise, and both-channel residual attention network (B-ARNet) for identification (<xref ref-type="bibr" rid="B53">Sujatha et&#xa0;al., 2021</xref>). Other types of attention mechanisms are also incorporated to enhance the model&#x2019;s recognition capability. In <xref ref-type="bibr" rid="B69">Zhao et&#xa0;al. (2021)</xref>, to adaptively recalibrate channel-wise feature responses, a squeeze-and-excitation (SE) module (<xref ref-type="bibr" rid="B22">Hu et&#xa0;al., 2018</xref>) is integrated into a ResNet50 network (<xref ref-type="bibr" rid="B18">He et&#xa0;al., 2016</xref>), with an average identification accuracy of 96.81% on the publicly available PlantVillage dataset (<xref ref-type="bibr" rid="B25">Hughes et&#xa0;al., 2015</xref>).</p>
<p>Additionally, <xref ref-type="bibr" rid="B5">Bhujel et&#xa0;al. (2022)</xref> compared the performance and computational complexity of different attention modules and found that the convolutional block attention module (CBAM) (<xref ref-type="bibr" rid="B60">Woo et&#xa0;al., 2018</xref>) was the most effective in enhancing classification performance, resulting in an average accuracy of 99.69%. Despite the successes of these deep learning-based methods, they face limitations such as the need for large amounts of labeled data and substantial computational resources. To address these challenges, researchers have proposed a series of strategies for constructing lightweight networks, such as depthwise separable convolutions (MobileNet (<xref ref-type="bibr" rid="B21">Howard et&#xa0;al., 2017</xref>)), channel shuffling (ShuffleNet (<xref ref-type="bibr" rid="B65">Zhang et&#xa0;al., 2018a</xref>)), and a combination of network scaling and architecture search (EfficientNet (<xref ref-type="bibr" rid="B54">Tan and Le, 2019</xref>)). For example, <xref ref-type="bibr" rid="B63">Zeng et&#xa0;al. (2022)</xref> developed a lightweight CNN model named LDSNet, which uses an improved dense dilated convolution (IDDC) block and coordinated attention scale fusion (CASF) mechanism to identify corn leaf diseases in complex backgrounds. Similarly, <xref ref-type="bibr" rid="B28">Janarthan et&#xa0;al. (2022)</xref> utilized a simplified MobileNetV2 architecture and an empirical method for creating class prototypes, requiring low processing power and storage space. <xref ref-type="bibr" rid="B36">Li et&#xa0;al. (2023)</xref> explored a hybrid transformer-based architecture by integrating shuffle-convolution and a lightweight transformer encoder. While compact models achieve computational efficiency gains by reducing the parameters, these gains may come at the cost of decreased accuracy (<xref ref-type="bibr" rid="B2">Atila et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B55">Thai et&#xa0;al., 2023</xref>).</p>
<p>In addition to identifying the presence of a plant disease, it is also crucial to estimate the severity of the disease, providing a quantitative assessment for disease diagnosis (<xref ref-type="bibr" rid="B27">Ilyas et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B29">Ji and Wu, 2022</xref>). The precise localization, size, and distribution of infected regions in plant leaves can significantly enhance the accuracy of disease classification, especially in field images with complex backgrounds (<xref ref-type="bibr" rid="B3">Barbedo, 2019</xref>). Moreover, these factors are vital for severity grading, disease progression monitoring, and assessment of treatment efficacy. The process of estimating the level of leaf diseases often involves two main steps: segmentation and grading. Segmentation refers to the operation of separating infected regions from healthy areas of the leaf or plant. This can be achieved through various methods such as morphological operations (<xref ref-type="bibr" rid="B15">Gupta, 2022</xref>), k-means clustering and thresholding (<xref ref-type="bibr" rid="B32">Karlekar and Seal, 2020</xref>; <xref ref-type="bibr" rid="B52">Singh et&#xa0;al., 2021</xref>), and deep learning-based semantic segmentation (<xref ref-type="bibr" rid="B57">Wang et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B37">Liu et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B13">Deng et&#xa0;al., 2023</xref>). Grading then assigns a numerical score or rating to the severity of the disease, based on proportional area measurement (<xref ref-type="bibr" rid="B61">Wu et&#xa0;al., 2022</xref>) or ordinal categories (<xref ref-type="bibr" rid="B41">Ozguven and Adem, 2019</xref>; <xref ref-type="bibr" rid="B42">Pal and Kumar, 2023</xref>). Considering the complementary nature of disease classification and severity estimation, there is an emerging trend toward multi-task learning. This approach aims to jointly optimize both tasks by leveraging shared representations and correlations between them. For example, <xref ref-type="bibr" rid="B30">Ji et&#xa0;al. (2020)</xref> presented a set of binary relevance-CNNs that can simultaneously recognize 7 crop species, classify 10 crop diseases (including healthy), and estimate 3 disease severity levels, achieving the best test accuracy of 86.70% for recognition and 92.93% for severity estimation. Other techniques, such as alternating training (<xref ref-type="bibr" rid="B31">Jiang et&#xa0;al., 2021</xref>) and weighting adjustment (<xref ref-type="bibr" rid="B59">Wang et&#xa0;al., 2022</xref>), have been explored to enhance the accuracy of the combined task. Although multi-task learning can lead to better performance than individual tasks, it may also introduce increased computational effort and suboptimal solutions due to the difficulty in balancing tasks.</p>
<p>To address these challenges, we propose a novel multi-task distillation learning framework for tomato leaf disease diagnosis (MTDL). Unlike traditional distillation learning (<xref ref-type="bibr" rid="B19">Hinton et&#xa0;al., 2015</xref>) that relies on one-to-one and one-way knowledge transfer from a teacher model to a student model. Instead, our framework considers tomato disease category identification and severity prediction as a multi-task model that can be optimized simultaneously, as well as two single-task models that can be mutually informative. Accordingly, we develop a learning process for knowledge decoupling and reorganization, facilitating the efficient transfer of knowledge between the two tasks. Furthermore, this process is designed to be integrated with deep networks of varying complexity and architecture, making it adaptable to different disease identification scenarios with diverse computational power configurations and performance requirements.</p>
<p>Specifically, MTDL uses a multi-task model that contains disease classification and severity estimation as the baseline. It adopts a multi-stage learning strategy, including knowledge disentanglement, single-task mutual learning, and knowledge integration, In this process, the goal of knowledge disentanglement is to transfer the shared knowledge from the original multi-task model to the corresponding single-task models. This enables the specialization of task-specific models and avoids negative transfer of knowledge between tasks. For mutual learning between tasks, the goal is to fully exploit the complementarity between different learning objectives. Finally, through knowledge integration, the disentangled and mutually learned knowledge components are re-combined and unified to produce the refined high-quality multi-task model.</p>
<p>Furthermore, considering that multi-stage distillation learning will lead to a dependency of the current student model on the teacher model from the previous stage, we propose a decoupled teacher-free knowledge distillation (DTF-KD) strategy to simplify the training process. DTF-KD introduces a virtual teacher, replacing the traditional teacher model in the distillation process. This approach allows for increased adaptability by applying different learning intensities to target and non-target knowledge. In the context of the classification problem addressed in this paper, the target knowledge corresponds to the correct classification assignment of the ground truth.</p>    <p>The main contributions of this paper are summarized as follows:</p>
<list list-type="order">
<list-item>
<p>We propose a novel multi-task distillation learning (MTDL) framework for leaf disease identification. This framework progressively decomposes and integrates the inherent knowledge from two tasks: tomato disease classification and severity prediction, through a distillation process, thereby generating a robust multi-task model for comprehensive disease diagnosis.</p>
</list-item>
<list-item>
<p>We propose a decoupled teacher-free knowledge distillation (DTF-KD) method to simplify MTDL by reducing the reliance on teacher models during the learning process. A virtual teacher is introduced to guide the learning process by providing separate instructions for the correct class and non-correct classes.</p>
</list-item>
<list-item>
<p>The experimental results demonstrate that the proposed framework effectively leverages the complementary characteristics of tomato disease category identification and severity prediction, reducing the model size while improving the performance.</p>
</list-item>
</list>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Dataset</title>
<p>The dataset employed in this study is aggregated from three distinct sources.The first source is drawn from the AI Challenger 2018 Crop Leaf Disease Challenge (<xref ref-type="bibr" rid="B12">Dataset AI Challenger, 2018</xref>), encompassing 11 types of plants and 27 types of diseases. Some of these diseases are further categorized into general and severe degrees, resulting in a total of 61 categories. Specifically, the dataset includes instances of leaf diseases for the following plants: apple (2,765), grape (3,144), peach (2,146), potato (3,246), citrus (4,577), pepper (1,929), strawberry (1,263), cherry (939), maize (3,514), pumpkin (1,465), and tomato (11,610). For the purposes of our study, we focus on the tomato subset. However, as the dataset contains only three samples of Canker disease, we decide to exclude this category from our analysis. The second source, the PlantDoc dataset (<xref ref-type="bibr" rid="B51">Singh et&#xa0;al., 2020</xref>), consists of 2,598 data samples that involve 13 types of plants and 27 categories (17 diseases, 11 healthy). These samples were mainly obtained from the internet and manually annotated, with the tomato subset containing 8 categories. The third source is the Taiwan Tomato Disease dataset (<xref ref-type="bibr" rid="B23">Huang and Chang, 2020</xref>), which is originally comprising 622 samples, was first employed in the study detailed in <xref ref-type="bibr" rid="B56">Thuseethan et&#xa0;al. (2022)</xref>. In addition, it encompasses six distinct categories, namely Bacterial Spotted (110), Leaf Mold (67), Gray Spot (84), Health (106), Late Blight (98), and Powdery Mildew (157). We choose this dataset for its diverse disease conditions and combine it with larger datasets like AI Challenger 2018 and PlantDoc to further enrich the diversity of our data. <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref> shows examples of different tomato leaf diseases.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Examples of tomato diseases from the datasets.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1330527-g001.tif"/>
</fig>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data preprocessing</title>
<p>For the AI Challenger dataset, given the scarcity of data for the canker disease category (only 3 instances), we excluded this data. The dataset provided severity labels for most of the data, categorized into three levels: healthy, moderate, and severe. In addition, we supplemented the dataset with severity labels for the tomato spotted wilt virus. For the PlantDoc dataset, due to the complexity of the leaf background, we manually cropped the tomato leaf subset to meet the needs of the disease identification task. Each image was cropped to retain the main area of a single leaf while preserving some background information from the plant. For the Taiwan Tomato dataset, we used all the original data. For all three datasets, we applied consistent severity labeling. Specifically, we hired five agricultural experts to manually annotate the severity of the disease. The final severity level was determined by a majority vote. <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref> summarizes the information about the three datasets used in this study.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Summary of main datasets used in the study.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Dataset</th>
<th valign="top" colspan="3" align="center">AIChallenger2018</th>
<th valign="top" colspan="3" align="center">PlantDoc</th>
<th valign="top" colspan="3" align="center">Taiwan</th>
<th valign="top" align="center">Total</th>
</tr>
<tr>
<th valign="top" align="center">Class</th>
<th valign="top" align="center">Healthy</th>
<th valign="top" align="center">Moderate</th>
<th valign="top" align="center">Severe</th>
<th valign="top" align="center">Healthy</th>
<th valign="top" align="center">Moderate</th>
<th valign="top" align="center">Severe</th>
<th valign="top" align="center">Healthy</th>
<th valign="top" align="center">Moderate</th>
<th valign="top" align="center">Severe</th>
<th valign="top" align="center"/>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">Health</td>
<td valign="top" align="center">1381</td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center">120</td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center">106</td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center">1607</td>
</tr>
<tr>
<td valign="top" align="center">Late Blight</td>
<td valign="top" align="center"/>
<td valign="top" align="center">302</td>
<td valign="top" align="center">1267</td>
<td valign="top" align="center"/>
<td valign="top" align="center">10</td>
<td valign="top" align="center">29</td>
<td valign="top" align="center"/>
<td valign="top" align="center">16</td>
<td valign="top" align="center">82</td>
<td valign="top" align="center">1706</td>
</tr>
<tr>
<td valign="top" align="center">Leaf Mold</td>
<td valign="top" align="center"/>
<td valign="top" align="center">371</td>
<td valign="top" align="center">384</td>
<td valign="top" align="center"/>
<td valign="top" align="center">40</td>
<td valign="top" align="center">67</td>
<td valign="top" align="center"/>
<td valign="top" align="center">22</td>
<td valign="top" align="center">45</td>
<td valign="top" align="center">929</td>
</tr>
<tr>
<td valign="top" align="center">Early Blight</td>
<td valign="top" align="center"/>
<td valign="top" align="center">287</td>
<td valign="top" align="center">505</td>
<td valign="top" align="center"/>
<td valign="top" align="center">22</td>
<td valign="top" align="center">86</td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center">900</td>
</tr>
<tr>
<td valign="top" align="center">Septoria Leaf Spot Fungus</td>
<td valign="top" align="center"/>
<td valign="top" align="center">481</td>
<td valign="top" align="center">922</td>
<td valign="top" align="center"/>
<td valign="top" align="center">23</td>
<td valign="top" align="center">141</td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center">1567</td>
</tr>
<tr>
<td valign="top" align="center">Gray Spot</td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center">25</td>
<td valign="top" align="center">59</td>
<td valign="top" align="center">84</td>
</tr>
<tr>
<td valign="top" align="center">Yellowing Varicose Leaf</td>
<td valign="top" align="center"/>
<td valign="top" align="center">1616</td>
<td valign="top" align="center">2790</td>
<td valign="top" align="center"/>
<td valign="top" align="center">35</td>
<td valign="top" align="center">88</td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center">4529</td>
</tr>
<tr>
<td valign="top" align="center">Bacterial Spotted</td>
<td valign="top" align="center"/>
<td valign="top" align="center">47</td>
<td valign="top" align="center">27</td>
<td valign="top" align="center"/>
<td valign="top" align="center">15</td>
<td valign="top" align="center">56</td>
<td valign="top" align="center"/>
<td valign="top" align="center">29</td>
<td valign="top" align="center">81</td>
<td valign="top" align="center">255</td>
</tr>
<tr>
<td valign="top" align="center">Mosaic Virus</td>
<td valign="top" align="center"/>
<td valign="top" align="center">104</td>
<td valign="top" align="center">194</td>
<td valign="top" align="center"/>
<td valign="top" align="center">26</td>
<td valign="top" align="center">43</td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center">367</td>
</tr>
<tr>
<td valign="top" align="center">Spider Mite Damage</td>
<td valign="top" align="center"/>
<td valign="top" align="center">619</td>
<td valign="top" align="center">310</td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center">929</td>
</tr>
<tr>
<td valign="top" align="center">Powdery Mildew</td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center">47</td>
<td valign="top" align="center">110</td>
<td valign="top" align="center">157</td>
</tr>
<tr>
<td valign="top" align="center">Total</td>
<td valign="top" align="center">1381</td>
<td valign="top" align="center">3827</td>
<td valign="top" align="center">6399</td>
<td valign="top" align="center">120</td>
<td valign="top" align="center">171</td>
<td valign="top" align="center">510</td>
<td valign="top" align="center">106</td>
<td valign="top" align="center">139</td>
<td valign="top" align="center">377</td>
<td valign="top" align="center">13030</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>We divide the dataset into training, validation, and test sets in an 8:1:1 ratio, ensuring a balanced and representative distribution for each set. The division is performed randomly to maintain fairness and diversity. Furthermore, we rigorously validate both the results reported in the paper and the determination of hyperparameters through 10-fold cross-validation.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Multi-task distillation framework</title>
<p>The proposed MTDL for tomato leaf disease diagnosis is comprised of three components: two single-task models, one for disease recognition and the other for severity prediction, and a hybrid model that integrates these two tasks. As illustrated in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>, the MTDL pipeline enables mutual knowledge transfer between the two individual tasks, facilitating knowledge disentanglement and integration to enhance performance. In traditional distillation learning processes (<xref ref-type="bibr" rid="B19">Hinton et&#xa0;al., 2015</xref>), a powerful teacher model transfer knowledge to a lightweight student model. However, our MTDL framework emphasizes bidirectional knowledge transfer between teacher and student models, allowing for greater flexibility in their selection.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Architecture of the multi-task distillation learning (MTDL). The MTDL framework uses a three-stage distillation process involving single-task models <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and a multi-task model <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mtext>h</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Initially, knowledge from <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mtext>h</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is transferred to the single-task models. Then, <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>share knowledge. Finally, their knowledge is integrated back into <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mtext>h</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, creating an improved multi-task model <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
<mml:mo>'</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>. For simplicity, sample indices are omitted from the symbols in the figure. Additionally, the temperature parameter <italic>T</italic> in KD is fixed at <italic>t</italic> during the learning process. <bold>(A)</bold> Knowledge Disentanglement, <bold>(B)</bold> Mutual Knowledge Transfer, <bold>(C)</bold> Knowledge Integration.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1330527-g002.tif"/>
</fig>
<sec id="s2_3_1">
<label>2.3.1</label>
<title>Problem formulation</title>
<p>Given a leaf disease dataset <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mo>=</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>}</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> containing <italic>N</italic> images, where <inline-formula>
<mml:math display="inline" id="im10">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">x</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the <italic>i</italic>-th leaf image with <italic>C</italic>, <italic>H</italic>, and <italic>W</italic> denoting the number of channels, height, and width of the image, respectively. Each image is labeled with two types of annotations: <inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:mo>{</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>c</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the disease category label, with <italic>K<sup>c</sup>
</italic> being the number of disease categories, and <inline-formula>
<mml:math display="inline" id="im12">
<mml:mrow>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:mo>{</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the disease degree label, with <italic>K<sup>s</sup>
</italic> being the number of severity levels.</p>
<p>In MTDL, there are three basic tasks denoted as <inline-formula>
<mml:math display="inline" id="im13">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for disease category recognition, <inline-formula>
<mml:math display="inline" id="im14">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for severity estimation, and <inline-formula>
<mml:math display="inline" id="im15">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for the hybrid task that jointly performs <inline-formula>
<mml:math display="inline" id="im16">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im17">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. As shown in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>, each task uses a standard ResNet50 (<xref ref-type="bibr" rid="B18">He et&#xa0;al., 2016</xref>) as the backbone for feature extraction. In particular, the two single tasks <inline-formula>
<mml:math display="inline" id="im18">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im19">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, each uses a multi-layer perceptron (MLP) to output the logits of its corresponding task, denoted as <inline-formula>
<mml:math display="inline" id="im20">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">z</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im21">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">z</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, respectively. For <inline-formula>
<mml:math display="inline" id="im22">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, two separate MLPs are used to perform two tasks simultaneously on a shared backbone, and the output is denoted as <inline-formula>
<mml:math display="inline" id="im23">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">z</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>h</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">z</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>:</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">z</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math display="inline" id="im24">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">z</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im25">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">z</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> corresponding to the logits for the disease category and severity, respectively. Usually, a softmax function is applied to the output of each task to produce the predicted probabilities, <inline-formula>
<mml:math display="inline" id="im26">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mrow>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>c</mml:mi>
</mml:msup>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im27">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mrow>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im28">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>h</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>:</mml:mo>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, respectively. Guided by these three basic tasks, MTDL employs a designed knowledge routing mechanism to build a tomato disease diagnosis model. The process begins with the distillation of multi-task knowledge from <inline-formula>
<mml:math display="inline" id="im29">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> back to the corresponding task models <inline-formula>
<mml:math display="inline" id="im30">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im31">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> (as shown in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2A</bold>
</xref>). These two models then engage in mutual learning (as shown in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2B</bold>
</xref>). Finally, the knowledge from these two models is integrated to output an enhanced multi-task model, namely <inline-formula>
<mml:math display="inline" id="im32">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> (as shown in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>). The detailed learning process is described in the following sections, including, knowledge decomposition (Section 2.3.2), mutual knowledge tranfer (Section 2.3.3), and knowledge integration (Section 2.3.4).</p>
</sec>
<sec id="s2_3_2">
<label>2.3.2</label>
<title>Knowledge disentanglement</title>
<p>Multi-task learning has demonstrated its advantages in leveraging shared information among related tasks to improve performance on individual tasks. However, directly training a multi-tasking model can be suboptimal, as the tasks may have different levels of difficulty. For instance, the task of severity estimation is more challenging than the leaf disease classification task because it typically necessitates a finer analysis of the leaf and disease spot attributes (<xref ref-type="bibr" rid="B58">Wang et&#xa0;al., 2017</xref>). Therefore, given a multi-task model <inline-formula>
<mml:math display="inline" id="im33">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> pre-trained on dataset <italic>D</italic>, as shown in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2A</bold>
</xref>, it is reasonable to disentangle the shared knowledge and transfer it back to the single-task models, i.e., <inline-formula>
<mml:math display="inline" id="im34">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im35">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, using knowledge distillation (<xref ref-type="bibr" rid="B19">Hinton et&#xa0;al., 2015</xref>). Specifically, when distilling knowledge from <inline-formula>
<mml:math display="inline" id="im36">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula>
<mml:math display="inline" id="im37">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, we first soften the probability <inline-formula>
<mml:math display="inline" id="im38">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> by:</p>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>exp</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">/</mml:mo>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mtext>exp&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">/</mml:mo>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>T</italic> is the temperature hyperparameter that controls the sharpness of <inline-formula>
<mml:math display="inline" id="im39">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im40">
<mml:mrow>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the <italic>j</italic>-th element of <inline-formula>
<mml:math display="inline" id="im41">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math display="inline" id="im42">
<mml:mrow>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> denotes the softened probability distribution of the <italic>j</italic>-th class for the <italic>i</italic>-th input data. The formulation of the knowledge distillation process from <inline-formula>
<mml:math display="inline" id="im43">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula>
<mml:math display="inline" id="im44">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> involves minimizing the loss function <inline-formula>
<mml:math display="inline" id="im45">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, which is defined as follows:</p>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">y</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im46">
<mml:mrow>
<mml:mo>&#xa0;</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the cross-entropy loss, which measures the dissimilarity between the predicted probability distribution <inline-formula>
<mml:math display="inline" id="im47">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and the one-hot ground-truth label vector <inline-formula>
<mml:math display="inline" id="im48">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">y</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> for the single-task model <inline-formula>
<mml:math display="inline" id="im49">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. It can be written as shown in <xref ref-type="disp-formula" rid="eq3">Equation 3</xref>:</p>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">y</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mtext>log&#xa0;</mml:mtext>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>And <inline-formula>
<mml:math display="inline" id="im50">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the knowledge distillation loss, which quantifies the divergence between <inline-formula>
<mml:math display="inline" id="im51">
<mml:mrow>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im52">
<mml:mrow>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, is defined as shown in <xref ref-type="disp-formula" rid="eq4">Equation 4</xref>:</p>
<disp-formula id="eq4">
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mtext>log&#xa0;</mml:mtext>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Similar to <xref ref-type="disp-formula" rid="eq2">Equation 2</xref>, we can define a loss function from</p>
<p>
<inline-formula>
<mml:math display="inline" id="im53">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula>
<mml:math display="inline" id="im54">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, denoted as <inline-formula>
<mml:math display="inline" id="im55">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, which is given by:</p>
<disp-formula id="eq5">
<label>(5)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">y</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im56">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the probability distribution obtained by softening the severity prediction output <inline-formula>
<mml:math display="inline" id="im57">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> from <inline-formula>
<mml:math display="inline" id="im58">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> (referred to in <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>), and <inline-formula>
<mml:math display="inline" id="im59">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the output from <inline-formula>
<mml:math display="inline" id="im60">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s2_3_3">
<label>2.3.3</label>
<title>Mutual knowledge transfer</title>
<p>Upon completing the knowledge disentanglement process, the shared knowledge from the hybrid tasks <inline-formula>
<mml:math display="inline" id="im61">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is individually transferred back to the corresponding subtasks, i.e., <inline-formula>
<mml:math display="inline" id="im62">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for disease species classification and <inline-formula>
<mml:math display="inline" id="im63">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for disease severity identification. We then employ mutual distillation to further investigate the complementarity of the two subtasks. Here, we assume that <inline-formula>
<mml:math display="inline" id="im64">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im65">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> use the same backbones, such as ResNet50. Motivated by <xref ref-type="bibr" rid="B33">Komodakis and Zagoruyko (2016)</xref>, as shown in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2B</bold>
</xref>, the commonality of knowledge between subtasks is reflected in the consistency of attention maps from the middle layer. Specifically, given two feature mappings, <inline-formula>
<mml:math display="inline" id="im66">
<mml:mrow>
<mml:msubsup>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im67">
<mml:mrow>
<mml:msubsup>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, which are the outputs of layer <italic>l</italic> of the models <inline-formula>
<mml:math display="inline" id="im68">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im69">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, respectively, we can calculate the attention maps, <inline-formula>
<mml:math display="inline" id="im70">
<mml:mrow>
<mml:msubsup>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im71">
<mml:mrow>
<mml:msubsup>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, as shown in <xref ref-type="disp-formula" rid="eq6">Equation 6</xref>:</p>
<disp-formula id="eq6">
<label>(6)</label>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:msubsup>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2003;</mml:mtext>
<mml:msubsup>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>C<sub>i</sub>
</italic> is the number of channels in the feature mappings of</p>
<p>
<inline-formula>
<mml:math display="inline" id="im72">
<mml:mrow>
<mml:msubsup>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im73">
<mml:mrow>
<mml:msubsup>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, and (<italic>k,x,y</italic>) specifies the location and channel of an activation value within the feature mapping. The attention maps <inline-formula>
<mml:math display="inline" id="im74">
<mml:mrow>
<mml:msubsup>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im75">
<mml:mrow>
<mml:msubsup>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> are computed by averaging the activation values across the channels of the respective feature mappings, <inline-formula>
<mml:math display="inline" id="im76">
<mml:mrow>
<mml:msubsup>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im77">
<mml:mrow>
<mml:msubsup>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>. For stability of optimization, we first reshape the <inline-formula>
<mml:math display="inline" id="im78">
<mml:mrow>
<mml:msubsup>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im79">
<mml:mrow>
<mml:msubsup>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> into a vector form as <inline-formula>
<mml:math display="inline" id="im80">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">a</mml:mtext>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mtext mathvariant="bold">vec</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im81">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">a</mml:mtext>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mtext mathvariant="bold">vec</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>A</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where vec(.) is an operation that transforms a matrix into a vector by concatenating its columns. Then, we normalize the vectors using <italic>l</italic>
<sub>2</sub> norm as shown in <xref ref-type="disp-formula" rid="eq7">Equation 7</xref>:</p>
<disp-formula id="eq7">
<label>(7)</label>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mover accent="true">
<mml:mi>a</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:mstyle>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>a</mml:mi>
</mml:mstyle>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="true">&#x2016;</mml:mo>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>a</mml:mi>
</mml:mstyle>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo stretchy="true">&#x2016;</mml:mo>
<mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2003;</mml:mtext>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mover accent="true">
<mml:mi>a</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:mstyle>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>a</mml:mi>
</mml:mstyle>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="true">&#x2016;</mml:mo>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>a</mml:mi>
</mml:mstyle>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo stretchy="true">&#x2016;</mml:mo>
<mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The attention transfer loss for layer <italic>l</italic> is written as shown in <xref ref-type="disp-formula" rid="eq8">Equation 8</xref>:</p>
<disp-formula id="eq8">
<label>(8)</label>
<mml:math display="block" id="M8">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mover accent="true">
<mml:mi>a</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:mstyle>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mover accent="true">
<mml:mi>a</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:mstyle>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">&#x2016;</mml:mo>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mover accent="true">
<mml:mi>a</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:mstyle>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mover accent="true">
<mml:mi>a</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:mstyle>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo stretchy="true">&#x2016;</mml:mo>
<mml:msubsup>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>And the total loss for mutual learning between subtasks is defined as follows:</p>
<disp-formula id="eq9">
<label>(9)</label>
<mml:math display="block" id="M9">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x2194;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">y</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">y</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>L</mml:mi>
</mml:mfrac>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>L</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">a</mml:mtext>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mi>l</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">a</mml:mtext>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>L</italic> denotes the number of layers considered for attention transfer loss.</p>
</sec>
<sec id="s2_3_4">
<label>2.3.4</label>
<title>Knowledge integration</title>
<p>The primary objective of the proposed MTDL is to enhance multi-task learning capabilities. In the final step of this learning framework, we consider the two sub-tasks after mutual learning, <inline-formula>
<mml:math display="inline" id="im83">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im84">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and reintegrate them into the original multi-tasking model, denoted as &#x2026; As shown in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>, this reintegration process results in an enhanced multi-task model <inline-formula>
<mml:math display="inline" id="im85">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
<mml:mo>'</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>.The knowledge integration loss is formulated as follows:</p>
<disp-formula id="eq10">
<label>(10)</label>
<mml:math display="block" id="M10">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mtable>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:mo stretchy="false">[</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">y</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">y</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im86">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im87">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> represent the output of softened probability distributions of <inline-formula>
<mml:math display="inline" id="im88">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im89">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, respectively, which are obtained by applying the process described in <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>. The whole process of MTDL is summarized in <xref ref-type="fig" rid="f10">
<bold>Algorithm 1</bold>
</xref>.</p>
<fig id="f10" position="float">
<label>Algorithm 1</label>
<caption>
<p>MTDL process.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1330527-g010.tif"/>
</fig>
</sec>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Teacher-free based MTDL</title>
<p>In the staged learning process of MTDL, the current stage can be considered the teacher model for subsequent stages. While this approach fully utilizes the process of knowledge transfer, it also leads to a dependency on the teacher model, thereby reducing the flexibility of the framework. To overcome this limitation, inspired by the work of <xref ref-type="bibr" rid="B62">Yuan et&#xa0;al. (2020)</xref> and <xref ref-type="bibr" rid="B68">Zhao et&#xa0;al. (2022)</xref>, we propose a decoupled teacher-free KD (DTF-KD) method. In the following sections, we first present the general form of the DTF-KD, and then demonstrate how it can be applied to MTDL.</p>
<p>In the absence of a teacher model, we introduce a virtual teacher. We define the output of this virtual teacher as a categorical distribution, <italic>v<sub>i,j</sub>
</italic>, given by:</p>
<disp-formula id="eq11">
<label>(11)</label>
<mml:math display="block" id="M11">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mi>&#x3b1;</mml:mi>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mtext>&#x2009;if&#x2009;</mml:mtext>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">/</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mtext>&#x2009;if&#x2009;</mml:mtext>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mo>\</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>&#x3b1;</italic> is a predefined constant, typically &#x2265; 0.95, <italic>t</italic> is the correct class or target class for the <italic>i</italic>-th sample, <italic>K</italic> is the total number of classes, <italic>j</italic> represents the class index, and \<italic>t</italic> denotes all classes except the correct class <italic>t</italic>. This definition ensures that the virtual teacher assigns the highest probability to the correct class, while distributing the remaining probability equally among the incorrect classes.</p>
<p>In our proposed DTF-KD method, we divide the information distillation process into two parts: teacherfree based correct class KD (CC-KD) and teacher-free based non-correct class KD (NCC-KD). CC-KD focuses on the learning of target knowledge. It aims to transfer knowledge that is particularly important or challenging for the student model. In CC-KD, according to <xref ref-type="disp-formula" rid="eq11">Equation 11</xref>, the binary probability outputs the virtual teacher for the correct class <italic>t</italic> and the <italic>K</italic>&#x2212;1 non-correct classes are denoted as <inline-formula>
<mml:math display="inline" id="im103">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>\</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. These outputs are calculated using:</p>
<disp-formula id="eq12">
<label>(12)</label>
<mml:math display="block" id="M12">
<mml:mrow>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>exp</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mtext>exp</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:msubsup>
<mml:mtext>exp</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2003;</mml:mtext>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>\</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:msubsup>
<mml:mtext>exp</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mtext>exp</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:msubsup>
<mml:mtext>exp</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Correspondingly, for the student model, we can obtain <inline-formula>
<mml:math display="inline" id="im104">
<mml:mrow>
<mml:msub>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>b</mml:mi>
</mml:mstyle>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo>\</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, defined as: </p>
<disp-formula id="eq13">
<label>(13)</label>
<mml:math display="block" id="M13">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>exp</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:msubsup>
<mml:mi>exp</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2003;</mml:mtext>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2216;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:msubsup>
<mml:mtext>exp&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:msubsup>
<mml:mi>exp</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>z<sub>i,j</sub>
</italic> represents the logit for the <italic>j</italic>-th class of <italic>i</italic>-th instance of the student model. Therefore, combining <xref ref-type="disp-formula" rid="eq12">Equations 12</xref> and <xref ref-type="disp-formula" rid="eq13">13</xref>, the loss function of CC-KD can be written as:</p>
<disp-formula id="eq14">
<label>(14)</label>
<mml:math display="block" id="M14">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>b</mml:mi>
</mml:mstyle>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>q</mml:mi>
</mml:mstyle>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
<mml:mi>log</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2216;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
<mml:mi>log</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2216;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2216;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In NCC-KD, we consider the probability outputs for the <italic>K</italic>&#x2212;1 non-correct classes, denoted as <inline-formula>
<mml:math display="inline" id="im106">
<mml:mrow>
<mml:msubsup>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mi>K</mml:mi>
</mml:msup>
<mml:msup>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for the virtual teacher and <inline-formula>
<mml:math display="inline" id="im107">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mi>K</mml:mi>
</mml:msup>
<mml:msup>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for the student model. For each <italic>m</italic> &#x2208; {1, 2,&#x2026;,<italic>K</italic>}\{<italic>t</italic>}, we calculate these outputs as follows:</p>
<disp-formula id="eq15">
<label>(15)</label>
<mml:math display="block" id="M15">
<mml:mrow>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>q</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>exp&#xa0;</mml:mtext>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:msubsup>
<mml:mi>exp</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2003;</mml:mtext>
<mml:msub>
<mml:mover accent="true">
<mml:mi>p</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>exp</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:msubsup>
<mml:mi>exp</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>v<sub>i,m</sub>
</italic> is defined in <xref ref-type="disp-formula" rid="eq11">Equation 11</xref>, and <italic>z<sub>i,m</sub>
</italic> represents the logit for the <italic>m</italic>-th class of the <italic>i</italic>-th instance from the student model. According to <xref ref-type="disp-formula" rid="eq15">Equation 15</xref>, the NCC-KD loss function is then defined as:</p>
<disp-formula id="eq16">
<label>(16)</label>
<mml:math display="block" id="M16">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:munderover>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>q</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
<mml:mi>log</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>q</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>p</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Combining <xref ref-type="disp-formula" rid="eq14">Equations 14</xref> and <xref ref-type="disp-formula" rid="eq16">16</xref>, the total loss of DTF-KD is</p>
<disp-formula id="eq17">
<label>(17)</label>
<mml:math display="block" id="M17">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>K</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>b</mml:mi>
</mml:mstyle>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>q</mml:mi>
</mml:mstyle>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mover accent="true">
<mml:mi>p</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
</mml:mstyle>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mover accent="true">
<mml:mi>q</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
</mml:mstyle>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>b</mml:mi>
</mml:mstyle>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>q</mml:mi>
</mml:mstyle>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mover accent="true">
<mml:mi>p</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
</mml:mstyle>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mover accent="true">
<mml:mi>q</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
</mml:mstyle>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>According to DTF-KD, we propose two variants of the MTDL framework. The first variant, as shown in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref> which we call partially teacher-free MTDL (MTDL-PTF), eliminates the knowledge disentanglement stage from the MTDL process, thereby removing the dependency on the initial multi-task teacher model, known as <inline-formula>
<mml:math display="inline" id="im108">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. To compensate for the absence of <inline-formula>
<mml:math display="inline" id="im109">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, we introduce two virtual teacher models corresponding to the two learning tasks of disease category recognition and severity estimation, denoted as <inline-formula>
<mml:math display="inline" id="im110">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im111">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, respectively. For <inline-formula>
<mml:math display="inline" id="im112">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, as described in <xref ref-type="disp-formula" rid="eq12">Equations 12</xref>, <xref ref-type="disp-formula" rid="eq13">13</xref> and <xref ref-type="disp-formula" rid="eq15">15</xref>, we obtain <inline-formula>
<mml:math display="inline" id="im113">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im114">
<mml:mrow>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>b</mml:mi>
</mml:mstyle>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for the distillation outputs for the correct class, as well as and <inline-formula>
<mml:math display="inline" id="im115">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mo stretchy="true">&#x2dc;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mrow>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>c</mml:mi>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>and <inline-formula>
<mml:math display="inline" id="im116">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mo stretchy="true">&#x2dc;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mrow>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>c</mml:mi>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> the non-correct classes. Similarly, for <inline-formula>
<mml:math display="inline" id="im117">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, we can obtain <inline-formula>
<mml:math display="inline" id="im118">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="bold">q</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im119">
<mml:mrow>
<mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>b</mml:mi>
</mml:mstyle>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for the correct severity level. For the non-correct severity levels, we can also obtain <inline-formula>
<mml:math display="inline" id="im120">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mo stretchy="true">&#x2dc;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mrow>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im121">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mi mathvariant="bold">p</mml:mi>
<mml:mo stretchy="true">&#x2dc;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mrow>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. Therefore, the mutual knowledge transfer process in MTDL-PTF is given as shown in <xref ref-type="disp-formula" rid="eq18">Equation 18</xref>:</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Overview of the decoupled teacher-free (DTF) based MTDL. <bold>(A)</bold> Partially teacher-free MTDL (MTDL-PTF): Eliminating dependency on the multi-task teacher model in the knowledge disentanglement stage. <bold>(B)</bold> Teacher-Free MTDL (MTDL-TF): Simplifying MTDL to only retain the final knowledge integration stage with virtual teachers.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1330527-g003.tif"/>
</fig>
<disp-formula id="eq18">
<label>(18)</label>
<mml:math display="block" id="M18">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x2194;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x2194;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:mo>[</mml:mo>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>K</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">b</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>K</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">b</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im122">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x2194;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im123">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>K</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> L<italic>
<sub>DFK</sub>
</italic>
<sub>-</sub>
<italic>
<sub>KD</sub>
</italic> are defined in <xref ref-type="disp-formula" rid="eq9">Equations 9</xref> and <xref ref-type="disp-formula" rid="eq17">17</xref>, respectively.</p>
<p>In the second variant of MTDL, named teacher-free MTDL (MTDL-TF), we completely abandon the teacher model. The process of MTDL-TF is illustrated in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3B</bold>
</xref>. Instead, we directly introduce the distillation information from the virtual teacher models <inline-formula>
<mml:math display="inline" id="im124">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im125">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> into <inline-formula>
<mml:math display="inline" id="im126">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, which is defined as shown in <xref ref-type="disp-formula" rid="eq19">Equation 19</xref>:</p>
<disp-formula id="eq19">
<label>(19)</label>
<mml:math display="block" id="M19">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x2112;</mml:mi>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
<mml:mi>v</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mtable>
<mml:mtr>
<mml:mtd>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">y</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>K</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">b</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">y</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>K</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">b</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">q</mml:mtext>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im127">
<mml:mrow>
<mml:msubsup>
<mml:mtext>b</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im128">
<mml:mrow>
<mml:msubsup>
<mml:mtext>b</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>are two binary probability outputs corresponding to the correct class and non-correct classes for the disease category recognition and severity estimation tasks, respectively, in the hybrid model <inline-formula>
<mml:math display="inline" id="im129">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. They can be obtained via <inline-formula>
<mml:math display="inline" id="im130">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">z</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im131">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold">z</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> using <xref ref-type="disp-formula" rid="eq13">Equation 13</xref>. Accordingly, the output for the non-correct classes in <inline-formula>
<mml:math display="inline" id="im132">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im133">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mo stretchy="true">&#x2dc;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im134">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mtext mathvariant="bold">p</mml:mtext>
<mml:mo stretchy="true">&#x2dc;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, can be calculated by <xref ref-type="disp-formula" rid="eq15">Equation 15</xref>.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Experimental results and discussion</title>
<sec id="s3_1">
<label>3.1</label>
<title>Experimental setup</title>
<sec id="s3_1_1">
<label>3.1.1</label>
<title>Model training</title>
<p>The MTDL framework consists of three main components: knowledge disentanglement, subtask mutual learning, and knowledge integration. To ensure simplicity and generality of the framework, we employ a consistent training strategy for different learning components. Specifically, the framework is trained using the SGD optimizer with a batch size of 32 and a momentum of 0.9. The initial learning rate is set to 0.001, and it is reduced by a factor of 0.1 every 20 epochs. The weight decay is set to 1e-4. The maximum number of training epochs is set to 100, and an early stopping strategy is used based on the validation performance. If the validation loss does not improve for 5 consecutive epochs, the training process is stopped.</p>
</sec>
<sec id="s3_1_2">
<label>3.1.2</label>
<title>Hyperparameter settings</title>
<p>The MTDL framework involves three main stages of knowledge distillation, which correspond to the objective functions in <xref ref-type="disp-formula" rid="eq2">Equations 2</xref>, <xref ref-type="disp-formula" rid="eq9">9</xref>, and <xref ref-type="disp-formula" rid="eq10">10</xref>. During the process, we use a temperature parameter <italic>T</italic> to smooth the output of the teacher model. This hyperparameter is determined through cross-validation using the validation set. A comprehensive analysis of hyperparameter selection can be found in Section 3.3.4.</p>
</sec>
<sec id="s3_1_3">
<label>3.1.3</label>
<title>Evaluation metrics</title>
<p>To evaluate the performance of the proposed MTDL method, we employ four commonly used evaluation metrics, namely Accuracy, Precision, Recall, and F1-score. Given true positives (TP), true negatives (TN), false positives (FP), and false negatives (FN), the specific definitions of these metrics are as shown in <xref ref-type="disp-formula" rid="eq20">Equations 20</xref> and <xref ref-type="disp-formula" rid="eq21">21</xref>:</p>
<disp-formula id="eq20">
<label>(20)</label>
<mml:math display="block" id="M20">
<mml:mrow>
<mml:mtext>Accuracy</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mtext>TN</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mtext>FP</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mtext>FN</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mtext>TN</mml:mtext>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2003;Precision</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mtext>FP</mml:mtext>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2003;Recall</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mtext>FN</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq21">
<label>(21)</label>
<mml:math display="block" id="M21">
<mml:mrow>
<mml:mtext>F</mml:mtext>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mtext>score</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>Precision</mml:mtext>
<mml:mo>&#xd7;</mml:mo>
<mml:mtext>Recall</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>Precision</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mtext>Recall</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</sec>
<sec id="s3_1_4">
<label>3.1.4</label>
<title>Baseline methods</title>
<p>The MTDL framework is a flexible knowledge distillation approach designed for tomato disease diagnosis. It aims to improve the performance of recognition models while reducing their parameter size and can be combined with various existing neural network architectures. To ensure the versatility of the MTDL framework, we incorporate four conventional network models, including ResNet101 (<xref ref-type="bibr" rid="B18">He et&#xa0;al., 2016</xref>), ResNet50 (<xref ref-type="bibr" rid="B18">He et&#xa0;al., 2016</xref>), DenseNet121 (<xref ref-type="bibr" rid="B24">Huang et&#xa0;al., 2017</xref>), and VGG16 (<xref ref-type="bibr" rid="B50">Simonyan and Zisserman, 2014</xref>), as well as four lightweight network models such as EfficientNet (<xref ref-type="bibr" rid="B54">Tan and Le, 2019</xref>), ShuffleNetV2 (<xref ref-type="bibr" rid="B67">Zhang et&#xa0;al., 2018b</xref>), MobileNetV3 (<xref ref-type="bibr" rid="B20">Howard et&#xa0;al., 2019</xref>), and SqueezeNet (<xref ref-type="bibr" rid="B26">Iandola et&#xa0;al., 2016</xref>). Detailed information about these models can be found in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>. These backbone models serve as the learning components in different stages of the MTDL framework. We use the original classification results of these models as a baseline and compare the results before and after the multi-task distillation process to validate the effectiveness of the proposed framework.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Baseline results of single and multi-task models.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="center">Methods</th>
<th valign="top" colspan="2" align="center">Single Task<break/>(Accuracy)</th>
<th valign="top" colspan="2" align="center">Multi Task (Accuracy)</th>
<th valign="top" colspan="2" align="center">Single Task<break/>(F1-score)</th>
<th valign="top" colspan="2" align="center">Multi Task<break/>(F1-score)</th>
<th valign="top" align="center">Parameter</th>
<th valign="top" align="center">FLOPs</th>
</tr>
<tr>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im135">
<mml:mrow>
<mml:mo>&#xa0;</mml:mo>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im136">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im137">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im138">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im139">
<mml:mrow>
<mml:mo>&#xa0;</mml:mo>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im140">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im141">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im142">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">(M)</th>
<th valign="top" align="center">(G)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">VGG16</td>
<td valign="top" align="center">96.68</td>
<td valign="top" align="center">93.34</td>
<td valign="top" align="center">96.76 (&#x2191;0.08)</td>
<td valign="top" align="center">93.43 (&#x2191;0.09)</td>
<td valign="top" align="center">96.57</td>
<td valign="top" align="center">94.34</td>
<td valign="top" align="center">96.82 (&#x2191;0.25)</td>
<td valign="top" align="center">94.53 (&#x2191;0.19)</td>
<td valign="top" align="center">253.864</td>
<td valign="top" align="center">15.699</td>
</tr>
<tr>
<td valign="top" align="center">ResNet101</td>
<td valign="top" align="center">98.11</td>
<td valign="top" align="center">93.61</td>
<td valign="top" align="center">98.56 (&#x2191;0.45)</td>
<td valign="top" align="center">94.33 (&#x2191;0.72)</td>
<td valign="top" align="center">97.72</td>
<td valign="top" align="center">94.51</td>
<td valign="top" align="center">98.14 (&#x2191;0.42)</td>
<td valign="top" align="center">95.13 (&#x2191;0.62)</td>
<td valign="top" align="center">42.529</td>
<td valign="top" align="center">7.832</td>
</tr>
<tr>
<td valign="top" align="center">ResNet50</td>
<td valign="top" align="center">97.21</td>
<td valign="top" align="center">93.43</td>
<td valign="top" align="center">97.75 (&#x2191;0.54)</td>
<td valign="top" align="center">93.70 (&#x2191;0.27)</td>
<td valign="top" align="center">97.20</td>
<td valign="top" align="center">94.43</td>
<td valign="top" align="center">97.41 (&#x2191;0.21)</td>
<td valign="top" align="center">94.69 (&#x2191;0.26)</td>
<td valign="top" align="center">23.537</td>
<td valign="top" align="center">4.109</td>
</tr>
<tr>
<td valign="top" align="center">DenseNet121</td>
<td valign="top" align="center">95.68</td>
<td valign="top" align="center">91.63</td>
<td valign="top" align="center">96.58 (&#x2191;0.90)</td>
<td valign="top" align="center">91.99 (&#x2191;0.36)</td>
<td valign="top" align="center">95.68</td>
<td valign="top" align="center">92.63</td>
<td valign="top" align="center">96.58 (&#x2191;0.90)</td>
<td valign="top" align="center">93.02 (&#x2191;0.39)</td>
<td valign="top" align="center">6.968</td>
<td valign="top" align="center">2.865</td>
</tr>
<tr>
<td valign="top" align="center">MobileNetV3Large</td>
<td valign="top" align="center">97.66</td>
<td valign="top" align="center">93.43</td>
<td valign="top" align="center">98.20 (&#x2191;0.54)</td>
<td valign="top" align="center">93.52 (&#x2191;0.09)</td>
<td valign="top" align="center">96.46</td>
<td valign="top" align="center">94.43</td>
<td valign="top" align="center">97.18 (&#x2191;0.72)</td>
<td valign="top" align="center">94.52 (&#x2191;0.09)</td>
<td valign="top" align="center">5.450</td>
<td valign="top" align="center">0.225</td>
</tr>
<tr>
<td valign="top" align="center">EfficientNet</td>
<td valign="top" align="center">97.75</td>
<td valign="top" align="center">93.88</td>
<td valign="top" align="center">98.11 (&#x2191;0.36)</td>
<td valign="top" align="center">93.97 (&#x2191;0.09)</td>
<td valign="top" align="center">96.65</td>
<td valign="top" align="center">94.78</td>
<td valign="top" align="center">97.11 (&#x2191;0.46)</td>
<td valign="top" align="center">94.97 (&#x2191;0.19)</td>
<td valign="top" align="center">4.025</td>
<td valign="top" align="center">0.398</td>
</tr>
<tr>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="top" align="center">97.03</td>
<td valign="top" align="center">91.72</td>
<td valign="top" align="center">97.21 (&#x2191;0.18)</td>
<td valign="top" align="center">92.35 (&#x2191;0.63)</td>
<td valign="top" align="center">96.01</td>
<td valign="top" align="center">92.62</td>
<td valign="top" align="center">96.21 (&#x2191;0.20)</td>
<td valign="top" align="center">93.34 (&#x2191;0.72)</td>
<td valign="top" align="center">2.123</td>
<td valign="top" align="center">0.059</td>
</tr>
<tr>
<td valign="top" align="center">ShuffleNetV2</td>
<td valign="top" align="center">96.58</td>
<td valign="top" align="center">91.63</td>
<td valign="top" align="center">96.76 (&#x2191;0.18)</td>
<td valign="top" align="center">91.99 (&#x2191;0.36)</td>
<td valign="top" align="center">95.37</td>
<td valign="top" align="center">92.62</td>
<td valign="top" align="center">95.76 (&#x2191;0.39)</td>
<td valign="top" align="center">92.79 (&#x2191;0.17)</td>
<td valign="top" align="center">1.268</td>
<td valign="top" align="center">0.148</td>
</tr>
<tr>
<td valign="top" align="center">SqueezeNet</td>
<td valign="top" align="center">94.15</td>
<td valign="top" align="center">90.37</td>
<td valign="top" align="center">94.33 (&#x2191;0.18)</td>
<td valign="top" align="center">90.45 (&#x2191;0.08)</td>
<td valign="top" align="center">94.35</td>
<td valign="top" align="center">91.37</td>
<td valign="top" align="center">94.53 (&#x2191;0.18)</td>
<td valign="top" align="center">91.75 (&#x2191;0.38)</td>
<td valign="top" align="center">0.743</td>
<td valign="top" align="center">0.738</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<inline-formula>
<mml:math display="inline" id="im143">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im144">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represent the disease category recognition and severity estimation tasks in single-task models, respectively. <inline-formula>
<mml:math display="inline" id="im145">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im146">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represent the corresponding tasks in multi-task models. The symbol &#x2191; symbol indicates Accuracy or F1-score improvement from the single-task baseline.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec id="s3_2" sec-type="results">
<label>3.2</label>
<title>Results</title>
<sec id="s3_2_1">
<label>3.2.1</label>
<title>Performance comparison</title>
<p>In this section, we report the results from two experimental settings. The first setting, referred to as unified MTDL, employs the same network architecture for teacher and student modules. This setting aims to verify the effectiveness of the multi-stage distillation architecture proposed in this paper. The second setting, termed heterogeneous MTDL, involves using lightweight network architectures for all student models within the MTDL framework. This setting is designed to demonstrate the advantages of the proposed architecture in achieving a balance between performance and efficiency. As a reference, <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref> lists the baseline results of the initial two single tasks <inline-formula>
<mml:math display="inline" id="im147">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im148">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, as well as the multi-task model <inline-formula>
<mml:math display="inline" id="im149">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math display="inline" id="im150">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im151">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> correspond to the results of <inline-formula>
<mml:math display="inline" id="im152">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for disease classification and severity estimation tasks, respectively. The results in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref> demonstrate that the multi-task learning approach effectively enhances performance across various network architectures.</p>
<p>The results for MTDL with a unified architecture are presented in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>. We can observe that all models show improvement when using MTDL for knowledge learning. This indicates that the MTDL framework effectively leverages the staged learning of knowledge and the complementarity between different tasks. In terms of specific models, ResNet101 achieves the highest performance in both tasks under the MTDL setting, with Accuracy scores of 98.92% for <inline-formula>
<mml:math display="inline" id="im153">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and 95.32% for <inline-formula>
<mml:math display="inline" id="im154">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, respectively. The corresponding F1-scores are 98.78% and 96.32%, respectively. These results can be attributed to both the ResNet101&#x2019;s powerful feature extraction capabilities and MTDL&#x2019;s effective multi-task learning strategy. On the other hand, SqueezeNet shows significant improvement with an increase of 1.08% and 2.53% in Accuracy of <inline-formula>
<mml:math display="inline" id="im155">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im156">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> respectively, and an increase of 0.68% and 2.26% in F1-scoref or each task. This suggests that the MTDL allows the lightweight model to learn more robust and comprehensive features. Furthermore, <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref> also provides a comparison between the MTDL, MTDL-PTF, and MTDL-TF methods across various architectures. The results indicate that while the overall performance of MTDL-PTF and MTDL-TF decreases when the dependence on the teacher model is reduced, the introduction of a virtual teacher model significantly improves the accuracy of both methods compared to the original multitask learning. This indeed validates the effectiveness of the decoupled teacher-free knowledge distillation approach that we proposed. We also display the confusion matrices for results using ResNet50 as the backbone. As shown in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>, it is evident that our proposed MTDL method either maintains or improves performance across all individual classes for both disease classification and severity estimation tasks. This demonstrates MTDL&#x2019;s ability to achieve a balanced enhancement in both overall performance and category-specific outcomes.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Performance of MTDL and its variants in a unified architecture.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="center">Methods (Accuracy)</th>
<th valign="middle" colspan="2" align="center">MTDL</th>
<th valign="middle" colspan="2" align="center">MTDL-PTF</th>
<th valign="top" colspan="2" align="center">MTDL-TF</th>
</tr>
<tr>
<th valign="middle" align="center">
<inline-formula>
<mml:math display="inline" id="im157">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="middle" align="center">
<inline-formula>
<mml:math display="inline" id="im158">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="middle" align="center">
<inline-formula>
<mml:math display="inline" id="im159">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="middle" align="center">
<inline-formula>
<mml:math display="inline" id="im160">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im161">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im162">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">VGG16</td>
<td valign="top" align="center">97.75 (&#x2191;0.99)</td>
<td valign="top" align="center">94.15 (&#x2191;0.72)</td>
<td valign="top" align="center">97.48 (&#x2191;0.72)</td>
<td valign="top" align="center">94.24 (&#x2191;0.81)</td>
<td valign="top" align="center">97.12 (&#x2191;0.36)</td>
<td valign="top" align="center">93.70 (&#x2191;0.27)</td>
</tr>
<tr>
<td valign="top" align="center">ResNet101</td>
<td valign="top" align="center">98.92 (&#x2191;0.36)</td>
<td valign="top" align="center">95.32 (&#x2191;0.99)</td>
<td valign="top" align="center">98.65 (&#x2191;0.09)</td>
<td valign="top" align="center">94.87 (&#x2191;0.54)</td>
<td valign="top" align="center">98.65 (&#x2191;0.09)</td>
<td valign="top" align="center">94.78 (&#x2191;0.45)</td>
</tr>
<tr>
<td valign="top" align="center">ResNet50</td>
<td valign="top" align="center">98.20 (&#x2191;0.45)</td>
<td valign="top" align="center">94.87 (&#x2191;1.17)</td>
<td valign="top" align="center">98.11 (&#x2191;0.36)</td>
<td valign="top" align="center">94.60 (&#x2191;0.90)</td>
<td valign="top" align="center">97.93 (&#x2191;0.18)</td>
<td valign="top" align="center">94.34 (&#x2191;0.64)</td>
</tr>
<tr>
<td valign="top" align="center">DenseNet121</td>
<td valign="top" align="center">97.30 (&#x2191;0.72)</td>
<td valign="top" align="center">93.79 (&#x2191;1.80)</td>
<td valign="top" align="center">97.30 (&#x2191;0.72)</td>
<td valign="top" align="center">93.79 (&#x2191;1.80)</td>
<td valign="top" align="center">97.30 (&#x2191;0.72)</td>
<td valign="top" align="center">92.35 (&#x2191;0.36)</td>
</tr>
<tr>
<td valign="top" align="center">Average Improvement</td>
<td valign="top" align="center">&#x2191;0.63</td>
<td valign="top" align="center">&#x2191;1.17</td>
<td valign="top" align="center">&#x2191;0.47</td>
<td valign="top" align="center">&#x2191;1.01</td>
<td valign="top" align="center">&#x2191;0.34</td>
<td valign="top" align="center">&#x2191;0.43</td>
</tr>
<tr>
<td valign="top" align="center">MobileNetV3Large</td>
<td valign="top" align="center">98.74 (&#x2191;0.54)</td>
<td valign="top" align="center">94.60 (&#x2191;1.08)</td>
<td valign="top" align="center">98.65 (&#x2191;0.45)</td>
<td valign="top" align="center">94.24 (&#x2191;0.72)</td>
<td valign="top" align="center">98.56 (&#x2191;0.36)</td>
<td valign="top" align="center">93.97 (&#x2191;0.45)</td>
</tr>
<tr>
<td valign="top" align="center">EfficientNet</td>
<td valign="top" align="center">98.74 (&#x2191;0.63)</td>
<td valign="top" align="center">94.78 (&#x2191;0.81)</td>
<td valign="top" align="center">98.47 (&#x2191;0.36)</td>
<td valign="top" align="center">94.33 (&#x2191;0.36)</td>
<td valign="top" align="center">98.56 (&#x2191;0.45)</td>
<td valign="top" align="center">94.24 (&#x2191;0.27)</td>
</tr>
<tr>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="top" align="center">97.48 (&#x2191;0.27)</td>
<td valign="top" align="center">93.16 (&#x2191;0.81)</td>
<td valign="top" align="center">97.84 (&#x2191;0.63)</td>
<td valign="top" align="center">93.16 (&#x2191;0.81)</td>
<td valign="top" align="center">97.30 (&#x2191;0.09)</td>
<td valign="top" align="center">92.53 (&#x2191;0.18)</td>
</tr>
<tr>
<td valign="top" align="center">ShuffleNetV2</td>
<td valign="top" align="center">97.21 (&#x2191;0.45)</td>
<td valign="top" align="center">93.52 (&#x2191;1.53)</td>
<td valign="top" align="center">97.21 (&#x2191;0.45)</td>
<td valign="top" align="center">93.70 (&#x2191;1.71)</td>
<td valign="top" align="center">96.94 (&#x2191;0.18)</td>
<td valign="top" align="center">93.07 (&#x2191;1.08)</td>
</tr>
<tr>
<td valign="top" align="center">SqueezeNet</td>
<td valign="top" align="center">95.41 (&#x2191;1.08)</td>
<td valign="top" align="center">92.98 (&#x2191;2.53)</td>
<td valign="top" align="center">96.40 (&#x2191;2.07)</td>
<td valign="top" align="center">93.07 (&#x2191;2.62)</td>
<td valign="top" align="center">95.14 (&#x2191;0.81)</td>
<td valign="top" align="center">91.63 (&#x2191;1.18)</td>
</tr>
<tr>
<td valign="top" align="center">Average Improvement</td>
<td valign="top" align="center">&#x2191;0.59</td>
<td valign="top" align="center">&#x2191;1.35</td>
<td valign="top" align="center">&#x2191;0.79</td>
<td valign="top" align="center">&#x2191;1.24</td>
<td valign="top" align="center">&#x2191;0.38</td>
<td valign="top" align="center">&#x2191;0.63</td>
</tr>
</tbody>
</table>
<table>
<thead>
<tr>
<th valign="top" rowspan="2" align="center">Methods (F1-Score)</th>
<th valign="top" colspan="2" align="center">MTDL</th>
<th valign="top" colspan="2" align="center">MTDL-PTF</th>
<th valign="top" colspan="2" align="center">MTDL-TF</th>
</tr>
<tr>
<th valign="middle" align="center">
<inline-formula>
<mml:math display="inline" id="im163">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="middle" align="center">
<inline-formula>
<mml:math display="inline" id="im164">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="middle" align="center">
<inline-formula>
<mml:math display="inline" id="im165">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="middle" align="center">
<inline-formula>
<mml:math display="inline" id="im166">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im167">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im168">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">VGG16</td>
<td valign="top" align="center">97.85 (&#x2191;1.03)</td>
<td valign="top" align="center">95.15 (&#x2191;0.62)</td>
<td valign="top" align="center">97.47 (&#x2191;0.65)</td>
<td valign="top" align="center">95.24 (&#x2191;0.41)</td>
<td valign="top" align="center">96.96 (&#x2191;0.14)</td>
<td valign="top" align="center">94.77 (&#x2191;0.24)</td>
</tr>
<tr>
<td valign="top" align="center">ResNet101</td>
<td valign="top" align="center">98.78 (&#x2191;0.64)</td>
<td valign="top" align="center">96.32 (&#x2191;1.19)</td>
<td valign="top" align="center">98.46 (&#x2191;0.32)</td>
<td valign="top" align="center">95.86 (&#x2191;0.56)</td>
<td valign="top" align="center">98.49 (&#x2191;0.35)</td>
<td valign="top" align="center">95.68 (&#x2191;0.38)</td>
</tr>
<tr>
<td valign="top" align="center">ResNet50</td>
<td valign="top" align="center">97.52 (&#x2191;0.32)</td>
<td valign="top" align="center">95.87 (&#x2191;1.44)</td>
<td valign="top" align="center">98.11 (&#x2191;0.70)</td>
<td valign="top" align="center">95.58 (&#x2191;0.89)</td>
<td valign="top" align="center">97.59 (&#x2191;0.18)</td>
<td valign="top" align="center">95.24 (&#x2191;0.55)</td>
</tr>
<tr>
<td valign="top" align="center">DenseNet121</td>
<td valign="top" align="center">97.11 (&#x2191;0.53)</td>
<td valign="top" align="center">94.80 (&#x2191;1.78)</td>
<td valign="top" align="center">97.11 (&#x2191;0.53)</td>
<td valign="top" align="center">94.60 (&#x2191;1.58)</td>
<td valign="top" align="center">97.03 (&#x2191;0.45)</td>
<td valign="top" align="center">93.34 (&#x2191;0.32)</td>
</tr>
<tr>
<td valign="top" align="center">Average Improvement</td>
<td valign="top" align="center">&#x2191;0.63</td>
<td valign="top" align="center">&#x2191;1.26</td>
<td valign="top" align="center">&#x2191;0.55</td>
<td valign="top" align="center">&#x2191;0.86</td>
<td valign="top" align="center">&#x2191;0.28</td>
<td valign="top" align="center">&#x2191;0.37</td>
</tr>
<tr>
<td valign="top" align="center">MobileNetV3Large</td>
<td valign="top" align="center">97.65 (&#x2191;0.47)</td>
<td valign="top" align="center">95.60 (&#x2191;1.08)</td>
<td valign="top" align="center">97.41 (&#x2191;0.23)</td>
<td valign="top" align="center">95.24 (&#x2191;0.72)</td>
<td valign="top" align="center">97.25 (&#x2191;0.07)</td>
<td valign="top" align="center">94.56 (&#x2191;0.04)</td>
</tr>
<tr>
<td valign="top" align="center">EfficientNet</td>
<td valign="top" align="center">97.95 (&#x2191;0.84)</td>
<td valign="top" align="center">95.78 (&#x2191;0.81)</td>
<td valign="top" align="center">97.52 (&#x2191;0.41)</td>
<td valign="top" align="center">95.33 (&#x2191;0.36)</td>
<td valign="top" align="center">97.36 (&#x2191;0.25)</td>
<td valign="top" align="center">95.24 (&#x2191;0.27)</td>
</tr>
<tr>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="top" align="center">97.41 (&#x2191;1.20)</td>
<td valign="top" align="center">94.16 (&#x2191;0.82)</td>
<td valign="top" align="center">97.28 (&#x2191;1.07)</td>
<td valign="top" align="center">94.16 (&#x2191;0.82)</td>
<td valign="top" align="center">97.14 (&#x2191;0.93)</td>
<td valign="top" align="center">93.36(&#x2191;0.02)</td>
</tr>
<tr>
<td valign="top" align="center">ShuffleNetV2</td>
<td valign="top" align="center">97.01 (&#x2191;1.25)</td>
<td valign="top" align="center">94.52 (&#x2191;1.73)</td>
<td valign="top" align="center">97.01 (&#x2191;1.25)</td>
<td valign="top" align="center">94.60 (&#x2191;1.81)</td>
<td valign="top" align="center">96.74 (&#x2191;0.98)</td>
<td valign="top" align="center">94.27 (&#x2191;1.45)</td>
</tr>
<tr>
<td valign="top" align="center">SqueezeNet</td>
<td valign="top" align="center">95.21 (&#x2191;0.68)</td>
<td valign="top" align="center">94.01 (&#x2191;2.26)</td>
<td valign="top" align="center">96.52 (&#x2191;1.99)</td>
<td valign="top" align="center">94.27 (&#x2191;2.52)</td>
<td valign="top" align="center">94.97 (&#x2191;0.81)</td>
<td valign="top" align="center">92.63 (&#x2191;0.88)</td>
</tr>
<tr>
<td valign="top" align="center">Average Improvement</td>
<td valign="top" align="center">&#x2191;0.89</td>
<td valign="top" align="center">&#x2191;1.34</td>
<td valign="top" align="center">&#x2191;0.99</td>
<td valign="top" align="center">&#x2191;0.76</td>
<td valign="top" align="center">&#x2191;0.61</td>
<td valign="top" align="center">&#x2191;0.53</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<inline-formula>
<mml:math display="inline" id="im169">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im170">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> represent MTDL&#x2019;s performance, while <inline-formula>
<mml:math display="inline" id="im171">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im172">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mi>v</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> are for MTDL-PTF and MTDL-TF with a virtual teacher. The &#x2191; symbol indicates Accuracy and F1-score improvement, referencing the multi-task baseline from <xref ref-type="table" rid="T2">
<bold>Table 2</bold>
</xref>.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Performance improvement through multi-stage distillation in MTDL. <bold>(A)</bold> Disease classification without MTDL, <bold>(B)</bold> Disease classification with MTDL, <bold>(C)</bold> Severity estimation without MTDL, <bold>(D)</bold> Severity estimation with MTDL.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1330527-g004.tif"/>
</fig>
<p>Furthermore, to investigate the impact of using teacher and student models with different architectures on the performance of the MTDL framework, we employ complex models like DenseNet121 for the teacher and lightweight models such as EfficientNet for the student. The results presented in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref> substantiate the effectiveness of this heterogeneous MTDL approach. For instance, when using ResNet101 as the teacher model, the SqueezeNet student model shows an improvement of 1.95% and 3.07% in <inline-formula>
<mml:math display="inline" id="im173">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im174">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>respectively, which are higher than the result obtained under the unified architecture MTDL setting. These results suggest that a more powerful teacher model enriches the student model&#x2019;s learning.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Performance evaluation of MTDL under a heterogeneous setting.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" colspan="2" align="center">Methods (Accuracy)</th>
<th valign="top" colspan="2" align="center">MTDL</th>
<th valign="top" colspan="2" align="center">Methods (Accuracy)</th>
<th valign="top" colspan="2" align="center">MTDL</th>
</tr>
<tr>
<th valign="top" align="center">Teacher</th>
<th valign="top" align="center">Student</th>
<th valign="middle" align="center">
<inline-formula>
<mml:math display="inline" id="im175">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im176">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">Teacher</th>
<th valign="top" align="center">Student</th>
<th valign="middle" align="center">
<inline-formula>
<mml:math display="inline" id="im177">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im178">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="5" align="center">VGG16</td>
<td valign="top" align="center">MobileNetV3Large</td>
<td valign="middle" align="center">98.74 (&#x2191;0.54)</td>
<td valign="top" align="center">94.51 (&#x2191;0.99)</td>
<td valign="middle" rowspan="5" align="center">ResNet50</td>
<td valign="top" align="center">MobileNetV3Large</td>
<td valign="middle" align="center">98.92 (&#x2191;0.72)</td>
<td valign="top" align="center">94.42 (&#x2191;0.90)</td>
</tr>
<tr>
<td valign="top" align="center">EfficientNet</td>
<td valign="middle" align="center">98.47 (&#x2191;0.36)</td>
<td valign="top" align="center">94.54 (&#x2191;0.57)</td>
<td valign="top" align="center">EfficientNet</td>
<td valign="middle" align="center">98.74 (&#x2191;0.63)</td>
<td valign="top" align="center">94.51 (&#x2191;0.54)</td>
</tr>
<tr>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="middle" align="center">97.48 (&#x2191;0.27)</td>
<td valign="top" align="center">93.52 (&#x2191;1.17)</td>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="middle" align="center">97.66 (&#x2191;0.45)</td>
<td valign="top" align="center">94.15 (&#x2191;1.80)</td>
</tr>
<tr>
<td valign="top" align="center">ShuffleNetV2</td>
<td valign="middle" align="center">97.57 (&#x2191;0.81)</td>
<td valign="top" align="center">93.07 (&#x2191;1.08)</td>
<td valign="top" align="center">ShuffleNetV2</td>
<td valign="middle" align="center">97.66 (&#x2191;0.90)</td>
<td valign="top" align="center">93.07 (&#x2191;1.08)</td>
</tr>
<tr>
<td valign="top" align="center">SqueezeNet</td>
<td valign="middle" align="center">95.95 (&#x2191;1.62)</td>
<td valign="top" align="center">92.62 (&#x2191;2.17)</td>
<td valign="top" align="center">SqueezeNet</td>
<td valign="middle" align="center">96.04 (&#x2191;1.71)</td>
<td valign="top" align="center">92.98 (&#x2191;2.53)</td>
</tr>
<tr>
<td valign="top" colspan="2" align="center">Average Improvement</td>
<td valign="middle" align="center">&#x2191;0.72</td>
<td valign="top" align="center">&#x2191;1.20</td>
<td valign="top" colspan="2" align="center">Average Improvement</td>
<td valign="middle" align="center">&#x2191;0.88</td>
<td valign="top" align="center">&#x2191;1.37</td>
</tr>
<tr>
<td valign="middle" rowspan="5" align="center">ResNet101</td>
<td valign="top" align="center">MobileNetV3Large</td>
<td valign="middle" align="center">98.92 (&#x2191;0.72)</td>
<td valign="top" align="center">95.05 (&#x2191;1.53)</td>
<td valign="middle" rowspan="5" align="center">DenseNet121</td>
<td valign="top" align="center">MobileNetV3Large</td>
<td valign="middle" align="center">98.38 (&#x2191;0.18)</td>
<td valign="top" align="center">94.51 (&#x2191;0.99)</td>
</tr>
<tr>
<td valign="top" align="center">EfficientNet</td>
<td valign="middle" align="center">98.79 (&#x2191;0.68)</td>
<td valign="top" align="center">95.13 (&#x2191;1.16)</td>
<td valign="top" align="center">EfficientNet</td>
<td valign="middle" align="center">98.47 (&#x2191;0.36)</td>
<td valign="top" align="center">94.87 (&#x2191;0.90)</td>
</tr>
<tr>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="middle" align="center">97.93 (&#x2191;0.72)</td>
<td valign="top" align="center">94.24 (&#x2191;1.89)</td>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="middle" align="center">97.87 (&#x2191;0.66)</td>
<td valign="top" align="center">93.34 (&#x2191;0.99)</td>
</tr>
<tr>
<td valign="top" align="center">ShuffleNetV2</td>
<td valign="middle" align="center">98.02 (&#x2191;1.26)</td>
<td valign="top" align="center">93.97 (&#x2191;1.98)</td>
<td valign="top" align="center">ShuffleNetV2</td>
<td valign="middle" align="center">97.48 (&#x2191;0.72)</td>
<td valign="top" align="center">93.79 (&#x2191;1.80)</td>
</tr>
<tr>
<td valign="top" align="center">SqueezeNet</td>
<td valign="middle" align="center">96.28 (&#x2191;1.95)</td>
<td valign="top" align="center">93.52 (&#x2191;3.07)</td>
<td valign="top" align="center">SqueezeNet</td>
<td valign="middle" align="center">96.17 (&#x2191;1.84)</td>
<td valign="top" align="center">92.80 (&#x2191;2.35)</td>
</tr>
<tr>
<td valign="top" colspan="2" align="center">Average Improvement</td>
<td valign="middle" align="center">&#x2191;1.07</td>
<td valign="top" align="center">&#x2191;1.93</td>
<td valign="top" colspan="2" align="center">Average Improvement</td>
<td valign="middle" align="center">&#x2191;0.75</td>
<td valign="top" align="center">&#x2191;1.41</td>
</tr>
</tbody>
</table>
<table>
<thead>
<tr>
<th valign="top" colspan="2" align="center">Methods (F1-Score)</th>
<th valign="top" colspan="2" align="center">MTDL</th>
<th valign="top" colspan="2" align="center">Methods (F1-score)</th>
<th valign="top" colspan="2" align="center">MTDL</th>
</tr>
<tr>
<th valign="top" align="center">Teacher</th>
<th valign="top" align="center">Student</th>
<th valign="middle" align="center">
<inline-formula>
<mml:math display="inline" id="im179">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im180">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">Teacher</th>
<th valign="top" align="center">Student</th>
<th valign="middle" align="center">
<inline-formula>
<mml:math display="inline" id="im181">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th valign="top" align="center">
<inline-formula>
<mml:math display="inline" id="im182">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="5" align="center">VGG16</td>
<td valign="top" align="center">MobileNetV3Large</td>
<td valign="middle" align="center">98.54 (&#x2191;1.36)</td>
<td valign="top" align="center">95.24 (&#x2191;0.72)</td>
<td valign="middle" rowspan="5" align="center">ResNet50</td>
<td valign="top" align="center">MobileNetV3Large</td>
<td valign="middle" align="center">98.72 (&#x2191;1.54)</td>
<td valign="top" align="center">95.62 (&#x2191;1.10)</td>
</tr>
<tr>
<td valign="top" align="center">EfficientNet</td>
<td valign="middle" align="center">97.98 (&#x2191;0.80)</td>
<td valign="top" align="center">95.36 (&#x2191;0.39)</td>
<td valign="top" align="center">EfficientNet</td>
<td valign="middle" align="center">98.46 (&#x2191;1.35)</td>
<td valign="top" align="center">95.51 (&#x2191;0.54)</td>
</tr>
<tr>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="middle" align="center">97.46 (&#x2191;1.25)</td>
<td valign="top" align="center">94.52 (&#x2191;1.18)</td>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="middle" align="center">97.66 (&#x2191;1.45)</td>
<td valign="top" align="center">94.10 (&#x2191;0.76)</td>
</tr>
<tr>
<td valign="top" align="center">ShuffleNetV2</td>
<td valign="middle" align="center">97.27 (&#x2191;1.51)</td>
<td valign="top" align="center">94.29 (&#x2191;1.50)</td>
<td valign="top" align="center">ShuffleNetV2</td>
<td valign="middle" align="center">97.66 (&#x2191;1.45)</td>
<td valign="top" align="center">93.98 (&#x2191;1.19)</td>
</tr>
<tr>
<td valign="top" align="center">SqueezeNet</td>
<td valign="middle" align="center">95.76 (&#x2191;1.23)</td>
<td valign="top" align="center">93.42 (&#x2191;1.67)</td>
<td valign="top" align="center">SqueezeNet</td>
<td valign="middle" align="center">96.04 (&#x2191;1.51)</td>
<td valign="top" align="center">93.67 (&#x2191;1.92)</td>
</tr>
<tr>
<td valign="top" colspan="2" align="center">Average Improvement</td>
<td valign="middle" align="center">&#x2191;0.83</td>
<td valign="top" align="center">&#x2191;1.09</td>
<td valign="top" colspan="2" align="center">Average Improvement</td>
<td valign="middle" align="center">&#x2191;1.46</td>
<td valign="top" align="center">&#x2191;1.10</td>
</tr>
<tr>
<td valign="middle" rowspan="5" align="center">ResNet101</td>
<td valign="top" align="center">MobileNetV3Large</td>
<td valign="middle" align="center">98.62 (&#x2191;1.44)</td>
<td valign="top" align="center">95.85 (&#x2191;1.33)</td>
<td valign="middle" rowspan="5" align="center">DenseNet121</td>
<td valign="top" align="center">MobileNetV3Large</td>
<td valign="middle" align="center">98.38 (&#x2191;1.20)</td>
<td valign="top" align="center">94.97 (&#x2191;0.45)</td>
</tr>
<tr>
<td valign="top" align="center">EfficientNet</td>
<td valign="middle" align="center">98.54 (&#x2191;1.43)</td>
<td valign="top" align="center">96.03 (&#x2191;1.06)</td>
<td valign="top" align="center">EfficientNet</td>
<td valign="middle" align="center">98.27 (&#x2191;1.16)</td>
<td valign="top" align="center">95.62 (&#x2191;0.65)</td>
</tr>
<tr>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="middle" align="center">97.72 (&#x2191;1.51)</td>
<td valign="top" align="center">94.94 (&#x2191;1.60)</td>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="middle" align="center">97.87 (&#x2191;1.66)</td>
<td valign="top" align="center">94.34 (&#x2191;1.00)</td>
</tr>
<tr>
<td valign="top" align="center">ShuffleNetV2</td>
<td valign="middle" align="center">98.22 (&#x2191;2.46)</td>
<td valign="top" align="center">94.87 (&#x2191;2.08)</td>
<td valign="top" align="center">ShuffleNetV2</td>
<td valign="middle" align="center">97.28 (&#x2191;1.52)</td>
<td valign="top" align="center">94.09 (&#x2191;1.30)</td>
</tr>
<tr>
<td valign="top" align="center">SqueezeNet</td>
<td valign="middle" align="center">96.28 (&#x2191;1.75)</td>
<td valign="top" align="center">93.52 (&#x2191;1.77)</td>
<td valign="top" align="center">SqueezeNet</td>
<td valign="middle" align="center">96.17 (&#x2191;1.64)</td>
<td valign="top" align="center">93.70 (&#x2191;1.95)</td>
</tr>
<tr>
<td valign="top" colspan="2" align="center">Average Improvement</td>
<td valign="middle" align="center">&#x2191;1.72</td>
<td valign="top" align="center">&#x2191;1.57</td>
<td valign="top" colspan="2" align="center">Average Improvement</td>
<td valign="middle" align="center">&#x2191;1.44</td>
<td valign="top" align="center">&#x2191;1.07</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The &#x2191; symbol indicates an improvement in Accuracy and F1-score, as compared to the results listed in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>, where both teacher and student models use a unified lightweight network for multi-task learning.</p>
</fn>
</table-wrap-foot>
</table-wrap>    <p>Finally, to ensure the effectiveness of our proposed method, we conduct a comprehensive comparison with four well-established approaches in the field to validate its performance:</p>
<list list-type="simple">
<list-item>
<p>(a) Dual-stream hierarchical bilinear pooling (DHBP) (<xref ref-type="bibr" rid="B59">Wang et&#xa0;al., 2022</xref>): As a multi-task method initially developed for crops and diseases classification, we adapt DHBP for both disease classification and severity prediction tasks. This comparison allows us to evaluate the performance of our MTDL approach against a specialized multi-task learning method within the same domain.</p>
</list-item>
<list-item>
<p>(b) Traditional knowledge distillation (KD) (<xref ref-type="bibr" rid="B14">Ghofrani and Toroghi, 2022</xref>) and decouple knowledge distillation (DKD) (<xref ref-type="bibr" rid="B68">Zhao et&#xa0;al., 2022</xref>): These two methods represent the knowledge distillation category. We apply KD and its enhanced version, DKD, to our disease recognition and severity estimation tasks, providing a direct comparison with standard and advanced distillation techniques.</p>
</list-item>
<list-item>
<p>(c) Attention transfer (AT) (<xref ref-type="bibr" rid="B33">Komodakis and Zagoruyko, 2016</xref>): Differing from KD and DKD that focus on distilling knowledge through predicted outcomes, AT utilizes attention maps to transfer knowledge between the teacher and student models. Including AT in our comparison allows us to assess the efficacy of a distinct transfer learning approach.</p>
</list-item>
</list>
<p>To ensure fair comparisons among KD, DKD, AT, and MTDL, we consistently used ResNet-101 as the teacher and MobileNetV3Small as the student model. This approach enables a reliable assessment of knowledge distillation efficacy. Additionally, we present MTDL results using ResNet-101 as both teacher and student, aligning with DHBP&#x2019;s backbone, to effectively demonstrate its multi-tasking capabilities.</p>
<p>The results are shown in <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>. In our experiments, MTDL with ResNet-101 as both teacher and student models achieve the best results, outperforming DHBP in disease classification by 0.53% in Accuracy and 0.29% in F1-score, and in severity prediction by 0.86% in Accuracy and 1.08% in F1-score. These improvements validate MTDL&#x2019;s phased multi-task learning approach. Moreover, when compared under the same teacher-student model setup with other distillation methods (KD, DKD, AT), MTDL excelled, particularly surpassing DKD by 0.37% in Accuracy and 0.16% in F1-score for disease classification, and by 0.62% in Accuracy and 0.38% in F1-score for severity prediction. This indicates the effectiveness of MTDL&#x2019;s proposed mutual distillation learning between teachers and students.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Comparative performance analysis of MTDL with other distillation-based and multi-task learning methods for disease classification and severity prediction.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="center">Methods</th>
<th valign="top" rowspan="2" align="center">Teacher</th>
<th valign="top" rowspan="2" align="center">Student</th>
<th valign="top" align="center">Disease</th>
<th valign="top" align="center">Classification</th>
<th valign="top" align="center">Severity</th>
<th valign="top" align="center">Prediction</th>
</tr>
<tr>
<th valign="top" align="center">Accuracy</th>
<th valign="top" align="center">F1-score</th>
<th valign="top" align="center">Accuracy</th>
<th valign="top" align="center">F1-score</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">DHBP (<xref ref-type="bibr" rid="B59">Wang et&#xa0;al., 2022</xref>)</td>
<td valign="top" align="center">ResNet101</td>
<td valign="top" align="center"/>
<td valign="top" align="center">98.39</td>
<td valign="top" align="center">98.49</td>
<td valign="top" align="center">94.46</td>
<td valign="top" align="center">95.24</td>
</tr>
<tr>
<td valign="top" align="center">KD (<xref ref-type="bibr" rid="B14">Ghofrani and Toroghi, 2022</xref>)</td>
<td valign="top" align="center">ResNet101</td>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="top" align="center">97.30</td>
<td valign="top" align="center">97.28</td>
<td valign="top" align="center">93.16</td>
<td valign="top" align="center">93.96</td>
</tr>
<tr>
<td valign="top" align="center">DKD <xref ref-type="bibr" rid="B68">Zhao et&#xa0;al. (2022)</xref>
</td>
<td valign="top" align="center">ResNet101</td>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="top" align="center">97.56</td>
<td valign="top" align="center">97.56</td>
<td valign="top" align="center">93.62</td>
<td valign="top" align="center">94.56</td>
</tr>
<tr>
<td valign="top" align="center">AT <xref ref-type="bibr" rid="B33">Komodakis and Zagoruyko (2016)</xref>
</td>
<td valign="top" align="center">ResNet101</td>
<td valign="top" align="center">MobileNetV3Small</td>
<td valign="top" align="center">97.39</td>
<td valign="top" align="center">97.46</td>
<td valign="top" align="center">93.28</td>
<td valign="top" align="center">94.09</td>
</tr>
<tr>
<td valign="middle" align="center">MTDL</td>
<td valign="top" align="center">ResNet101<break/>ResNet101</td>
<td valign="top" align="center">MobileNetV3Small<break/>ResNet101</td>
<td valign="top" align="center">97.93<break/>98.92</td>
<td valign="top" align="center">97.72<break/>98.78</td>
<td valign="top" align="center">94.24<break/>95.32</td>
<td valign="top" align="center">94.94<break/>96.32</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2_2">
<label>3.2.2</label>
<title>Significance analysis</title>
<p>In this subsection, we conduct a Wilcoxon Signed-Rank Test (<xref ref-type="bibr" rid="B10">Corder and Foreman, 2014</xref>) to evaluate the significance of the performance improvements across all CNN architectures. We provide the detailed significance analysis corresponding to the results originally presented in <xref ref-type="table" rid="T3">
<bold>Tables&#xa0;3</bold>
</xref> and <xref ref-type="table" rid="T4">
<bold>4</bold>
</xref> in the following <xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref> and <xref ref-type="table" rid="T7">
<bold>7</bold>
</xref>. In <xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref>, we present a comparison of the performance of our MTDL model and its variants against several baseline CNN architectures. This table focuses on scenarios within our MTDL framework where both the teacher and student models utilize identical architecture. The results from this table demonstrate statistically significant improvements across all comparisons in both disease classification and severity prediction tasks. The p-values obtained are consistently well below the 0.05 threshold, indicating robust enhancements attributed to our MTDL approach. Similarly, <xref ref-type="table" rid="T7">
<bold>Table&#xa0;7</bold>
</xref> showcases the results in a heterogeneous setting, where the MTDL model employs a more complex architecture as the teacher model and a lightweight network as the student model. In these comparisons, the results again confirm significant improvements across all evaluated aspects.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Wilcoxon Signed-Rank Test results for MTDL variants&#x2019; Accuracy in a unified architecture.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Task</th>
<th valign="top" align="center">Model</th>
<th valign="top" align="center">vs VGG16</th>
<th valign="top" align="center">vs ResNet101</th>
<th valign="top" align="center">vs ResNet50</th>
<th valign="top" align="center">vs DenseNet121</th>
<th valign="top" align="center"/>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">Disease Classification</td>
<td valign="top" align="center">MTDL</td>
<td valign="top" align="center">1.953 &#xd7; 10&#x2212;<sup>3</sup>
</td>
<td valign="top" align="center">1.367 &#xd7; 10<sup>&#x2212;2</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.172 &#xd7; 10<sup>&#x2212;2</sup>
</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">MTDL-PTF</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.065 &#xd7; 10<sup>&#x2212;2</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">MTDL-TF</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">2.066 &#xd7; 10&#x2212;<sup>2</sup>
</td>
<td valign="top" align="center">4.980 &#xd7; 10<sup>&#x2212;2</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="center">Severity Prediction</td>
<td valign="top" align="center">MTDL</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">MTDL-PTF</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">MTDL-TF</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center"/>
</tr>
<tr>
<th valign="top" align="center">Task</th>
<th valign="top" align="center">Model</th>
<th valign="top" align="center">vs MobileNetV3Large</th>
<th valign="top" align="center">vs EfficientNet</th>
<th valign="top" align="center">vs MobileNetV3Small</th>
<th valign="top" align="center">vs ShuffleNetV2</th>
<th valign="top" align="center">vs SqueezeNet</th>
</tr>
<tr>
<td valign="top" align="center">Disease Classification</td>
<td valign="top" align="center">MTDL</td>
<td valign="top" align="center">1.151 &#xd7; 10<sup>&#x2212;2</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">3.906 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">MTDL-PTF</td>
<td valign="top" align="center">1.172 &#xd7; 10<sup>&#x2212;1</sup>
</td>
<td valign="top" align="center">1.079 &#xd7; 10<sup>&#x2212;2</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">MTDL-TF</td>
<td valign="top" align="center">4.206 &#xd7; 10<sup>&#x2212;2</sup>
</td>
<td valign="top" align="center">1.065 &#xd7; 10<sup>&#x2212;2</sup>
</td>
<td valign="top" align="center">3.906 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="top" align="center">Severity Prediction</td>
<td valign="top" align="center">MTDL</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">MTDL-PTF</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">3.906 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">MTDL-TF</td>
<td valign="top" align="center">1.278 &#xd7; 10<sup>&#x2212;2</sup>
</td>
<td valign="top" align="center">2.734 &#xd7; 10&#x2212;2</td>
<td valign="top" align="center">1.079 &#xd7; 10<sup>&#x2212;2</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953 &#xd7; 10<sup>&#x2212;3</sup>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T7" position="float">
<label>Table&#xa0;7</label>
<caption>
<p>Wilcoxon Signed-Rank Test results for MTDL variants&#x2019; Accuracy under heterogeneous settings (&#x2018;()&#x2019; indicate teacher models).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Task</th>
<th valign="middle" align="center">Model</th>
<th valign="middle" align="center">vs MobileNetV3Large</th>
<th valign="middle" align="center">vs EfficientNet</th>
<th valign="middle" align="center">vs MobileNetV3Small</th>
<th valign="middle" align="center">vs ShuffleNetV2</th>
<th valign="middle" align="center">vs SqueezeNet</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="center">Disease Classification</td>
<td valign="middle" align="center">MTDL (VGG16)</td>
<td valign="middle" align="center">7.632 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.162 &#xd7;10<sup>&#x2212;2</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="middle" align="center">MTDL (ResNet101)</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="middle" align="center">MTDL (ResNet50)</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">3.906 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="middle" align="center">MTDL (DenseNet121)</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Severity Prediction</td>
<td valign="middle" align="center">MTDL (VGG16)</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="middle" align="center">MTDL (ResNet101)</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="middle" align="center">MTDL (ResNet50)</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="middle" align="center">MTDL (DenseNet121)</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="middle" align="center">1.953 &#xd7;10<sup>&#x2212;3</sup>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In addition, we also perform the significance of the results in comparison with other multi-task and distillation learning methods. with the results recorded in <xref ref-type="table" rid="T8">
<bold>Table&#xa0;8</bold>
</xref>. It can be seen that in most cases, the MTDL framework shows statistically significant differences when compared with methods like DHBP, KD, DKD, and AT, with p-values well beneath the 0.05 significance threshold. However, there is one exception to note: in the case of MTDL (ResNet101-MobileNetV3Small) vs DHBP for severity prediction, the p-value is slightly above the conventional threshold for significance. This exception likely stems from MTDL employing lightweight MobileNetV3Small as the distillation target, whereas DHBP uses the more substantial ResNet101 as its base model.</p>
<table-wrap id="T8" position="float">
<label>Table&#xa0;8</label>
<caption>
<p>Results of the Wilcoxon Signed-Rank Test for MTDL and its variants versus other methods (The first in &#x2018;()&#x2019; is the teacher model and the second is the student model).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Task</th>
<th valign="top" align="center">Model</th>
<th valign="top" align="center">vs DHBP</th>
<th valign="top" align="center">vs KD</th>
<th valign="top" align="center">vs DKD</th>
<th valign="top" align="center">vs AT</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" rowspan="2" align="center">Disease Classification</td>
<td valign="top" align="center">MTDL (ResNet101-ResNet101)</td>
<td valign="top" align="center">1.507&#xd7;10<sup>&#x2212;2</sup>
</td>
<td valign="top" align="center">1.953&#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953&#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953&#xd7;10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="top" align="center">MTDL (ResNet101-MobileNetV3Small)</td>
<td valign="top" align="center">1.953&#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953&#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953&#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953&#xd7;10<sup>&#x2212;3</sup>
</td>
</tr>
<tr>
<td valign="top" rowspan="2" align="center">Severity Prediction</td>
<td valign="top" align="center">MTDL (ResNet101-ResNet101)</td>
<td valign="top" align="center">1.953&#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953&#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953&#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953&#xd7;10&#x2212;3</td>
</tr>
<tr>
<td valign="top" align="center">MTDL (ResNet101-MobileNetV3Small)</td>
<td valign="top" align="center">9.219&#xd7;10&#x2212;2</td>
<td valign="top" align="center">1.953&#xd7;10<sup>&#x2212;3</sup>
</td>
<td valign="top" align="center">1.953&#xd7;10&#x2212;3</td>
<td valign="top" align="center">1.953&#xd7;10&#x2212;3</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s3_3" sec-type="discussion">
<label>3.3</label>
<title>Discussion</title>
<sec id="s3_3_1">
<label>3.3.1</label>
<title>The effectiveness of multi-stage distillation learning</title>
<p>We assess the effectiveness of the three stages in our MTDL framework: knowledge disentanglement, mutual knowledge transfer, and knowledge integration. To do so, we employ single-task and multi-task models as our baselines and incorporate the results obtained after each stage of learning. As illustrated in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>, the results in terms of Accuracy and F1-score align with our expectations. The results clearly demonstrate that each stage of learning contributes to the final performance improvement, thereby validating the effectiveness of staged distillation in the MTDL framework.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Performance improvement through multi-stage distillation in MTDL. <bold>(A)</bold> Accuracy for identifying disease types, <bold>(B)</bold> Accuracy for assessing disease severity, <bold>(C)</bold> F1-score for identifying disease types, <bold>(D)</bold> F1-score for assessing disease severity.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1330527-g005.tif"/>
</fig>
</sec>
<sec id="s3_3_2">
<label>3.3.2</label>
<title>Trade-off between performance and efficiency</title>
<p>We investigate the balance between performance and efficiency within the context of our MTDL framework. Performance is measured by Accuracy, while efficiency is represented by the number of parameters and floating-point operations (FLOPs). We use the single-task ResNet101 model and the multi-task ResNet101 model as baselines due to their superior performance across all single-task and multi-task models, as shown in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>. The results are presented in <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref>, and the size of each model&#x2019;s marker in the figure represents the number of parameters used by the model.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Trade-off between performance and efficiency. <bold>(A)</bold> Task for disease classification, <bold>(B)</bold> Task for disease estimation.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1330527-g006.tif"/>
</fig>
<p>It can be observed that there is a similar trend in both task of disease classification (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6A</bold>
</xref>) and disease severity estimation (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6B</bold>
</xref>). Our MTDL-enhanced ResNet101 notably surpasses the single-task baseline with an Accuracy improvement of 0.81% for disease classification and 1.71% for severity estimation, and it outperforms the multi-task baseline with 0.36% and 0.99% improvements respectively. When using MobileNetV3Large as the MTDL-optimized model, we achieved significant performance gains with reduced parameter count and FLOPs, while still enhancing Accuracy over both baselines. For example, the MobileNetV3Large model, enhanced by our MTDL framework, outperforms the ResNet101 baseline by 0.63% and 1.44% in the two tasks, respectively. Remarkably, this is achieved with only 12.81% of the parameters (5.450M vs. 42.529M) and 2.87% of the FLOPs (0.225G vs. 7.832G). These findings highlight the MTDL framework&#x2019;s capability to improve performance significantly while maintaining computational efficiency, thereby reinforcing its advantage over conventional models.</p>
<p>Therefore, we need to select the appropriate distillation model for each specific scenario. The choice depends on balancing computational resources and performance. Typically, complex teachers like ResNet101 outperform compact students such as MobileNet, owing to deeper architectures. MTDL promotes mutual learning between teachers and students, simultaneously enhancing both models. With abundant resources, an MTDL-optimized teacher offers substantial performance gains. In contrast, for limited-resource scenarios like mobile inference, MTDL can distill a lightweight yet performant student model. Additionally, the teacher-free MTDL-TF variant reduces dependency on complex teachers, offering an alternative when resources are constrained.</p>
</sec>
<sec id="s3_3_3">
<label>3.3.3</label>
<title>Visual analysis for multi-task learning</title>
<p>In this section, we use Grad-CAM (<xref ref-type="bibr" rid="B46">Selvaraju et&#xa0;al., 2017</xref>) for visual analysis to gain deeper insights into the learning process of our MTDL framework. We examine three severity levels of Early Blight: healthy, general, and severe. Visualizations for single-task and multi-task models, as well as for each stage of MTDL learning, are provided. <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref> shows that the model&#x2019;s attention shifts toward task-relevant areas as it learns. For healthy leaves, the MTDL-enhanced model more precisely identifies the leaf as a whole, aligning with human visual systems. For leaves at a general severity level, the model focuses on localized disease spots for classification but expands its attention to surrounding regions for severity estimation. In cases of severe disease levels, the disease spots typically exhibit a widespread distribution across the leaf area. The knowledge integration model, in its pursuit to accurately recognize both the disease type and severity, tends to produce a Grad-CAM sensitivity map covering the entire leaf area. This comprehensive coverage contrasts with the single-task model, which primarily focuses on localized diseased regions, and the multi-task model, which, although it expands the area of interest, does not distribute sensitivity intensity as effectively. Moreover, the distribution of sensitivity intensity in the knowledge integration model offers a more realistic representation of the disease&#x2019;s extensive impact, thereby enhancing the model&#x2019;s explanatory power for Severe Early Blight. This analysis highlights the MTDL framework&#x2019;s adaptability in shifting its focus based on the task and severity, thereby improving performance and interpretability.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Visual analysis of attention shifts in MTDL framework across severity levels.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1330527-g007.tif"/>
</fig>
</sec>
<sec id="s3_3_4">
<label>3.3.4</label>
<title>Parameter sensitivity analysis</title>
<p>The temperature parameter <italic>T</italic> adjusts the softmax output in the neural network, smoothing the probability distribution and revealing more nuanced information about the model&#x2019;s predictions. This is crucial for knowledge distillation, where it aids in transferring detailed information from a teacher to a student model. This concept is introduced and utilized in <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>. To assess the sensitivity of our model to <italic>T</italic>, we vary <italic>T</italic> within the interval [0.1,50] and record the Accuracy of the disease classification and severity estimation tasks for each value. The results of nine common network architectures are shown in <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref>. Despite the differences in architecture, a similar trend is observed: as <italic>T</italic> increases, the model&#x2019;s performance improves, but rapidly declines when <italic>T</italic> exceeds 10. Notably, the model&#x2019;s performance remains relatively stable for <italic>T</italic> within the interval [3,8]. This indicates that our model is robust to the choice of <italic>T</italic> within this range, providing flexibility in practical applications.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Sensitivity analysis of temperature hyperparameter <italic>T</italic> in MTDL framework. <bold>(A)</bold> Accuracy for identifying disease types, <bold>(B)</bold> Accuracy for assessing disease severity, <bold>(C)</bold> F1-score for identifying disease types, <bold>(D)</bold> F1-score for assessing disease severity.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1330527-g008.tif"/>
</fig>
<p>One the other hand, the selection of a batch size of 32, momentum of 0.9, and learning rate decay factor of 0.1 was guided by a combination of empirical conventions and experimental validation aimed at striking a balance between computational efficiency and model performance. To validate the impact of different parameter settings on performance, we analyzed MTDL and its variants on the validation set for varying batch sizes (<xref ref-type="fig" rid="f9">
<bold>Figures&#xa0;9A, B</bold>
</xref>), momentum (<xref ref-type="fig" rid="f9">
<bold>Figures&#xa0;9C, D</bold>
</xref>), and learning rate decay factors (<xref ref-type="fig" rid="f9">
<bold>Figures&#xa0;9E, F</bold>
</xref>), detailing their effects on Accuracy. We can see that Accuracy remains relatively stable across batch sizes that varies (8, 16, 32, 64, 128), with the optimal average Accuracy achieved at 32. This is likely because a moderate batch size balances gradient estimation Accuracy and the beneficial noise of stochasticity, optimizing learning. As momentum increases from 0.1 to 0.9, Accuracy generally improves. A higher momentum, like 0.9, effectively uses past gradients to accelerate convergence and navigate through local minima, leading to better performance compared to a lower setting like 0.1. Moreover, increasing decay factors tend to lower Accuracy, potentially due to a swift reduction in the learning rate and premature convergence. An optimal decay factor is one that slowly decreases the learning rate, facilitating precise adjustments as the model converges to the best solution.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Effect of different parameters on model performance. <bold>(A)</bold> Task for disease classification (BatchSize), <bold>(B)</bold> Task for disease severity (BatchSize), <bold>(C)</bold> Task for disease classification (Momentum), <bold>(D)</bold> Task for disease severity (Momentum), <bold>(E)</bold> Task for disease classification (Learning Rate Decay Factor), <bold>(F)</bold> Task for disease severity (Learning Rate Decay Factor).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1330527-g009.tif"/>
</fig>
</sec>
</sec>
</sec>
<sec id="s4" sec-type="conclusions">
<label>4</label>
<title>Conclusion</title>
<p>In this work, we present the multi-task distillation learning (MTDL) framework, a specialized solution for diagnosing tomato diseases. The framework comprises three key stages: knowledge disentanglement, mutual knowledge transfer, and knowledge integration. Using this staged learning approach, we leverage the complementary aspects of different tasks to enhance performance across various network architectures. Moreover, our framework adeptly balances performance with efficiency, underlining its potential for practical applications. Although MTDL enhances traditional knowledge distillation with bidirectional knowledge transfer between teacher and student models, it extends training time due to a progressive, multi-stage learning approach. To mitigate this, we introduce MTDL-PTF and MTDL-TF variants for efficiency, though they may slightly underperform compared to the original MTDL.</p>
<p>Furthermore, our current framework has some limitations. First, although the framework is designed for outdoor environments, it has stringent requirements for the subject being photographed, focusing mainly on recognizing single subjects in images. Second, the severity level classification is relatively basic, encompassing only three levels, including a healthy state. In future work, we plan to integrate object localization techniques into the distillation process to facilitate the identification of multiple leaves in images. Additionally, we aim to refine the classification of disease severity levels, focusing especially on the early detection of diseases. These planned enhancements will contribute to the development of more sophisticated and nuanced solutions in the field of tomato disease diagnosis, offering a robust framework for sustainable and intelligent agriculture.</p>
</sec>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>BL: Conceptualization, Methodology, Writing &#x2013; original draft. SW: Data curation, Formal analysis, Software, Writing &#x2013; review &amp; editing. FZ: Funding acquisition, Methodology, Writing &#x2013; review &amp; editing. NG: Formal analysis, Validation, Writing &#x2013; review &amp; editing. HF: Data curation, Formal analysis, Validation, Writing &#x2013; review &amp; editing. WY: Project administration, Writing &#x2013; review &amp; editing.</p>
</sec>
</body>
<back>
<sec id="s7" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work was supported by the National Natural Science Foundation of China (No. 61972132), the S&amp;T Program of Hebei (Nos. 20327404D, 21327404D, 21327407D), the Natural Science Foundation of Hebei Province, China (Nos. F2020204009, C2023204069), and the Research Project for Self-cultivating Talents of Hebei Agricultural University (No. PY201810).</p>
</sec>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Albahli</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Nawaz</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Dcnet: Densenet-77-based cornernet model for the tomato plant leaf disease detection and classification</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>, <elocation-id>957961</elocation-id>. doi: <pub-id pub-id-type="doi">10.3389/fpls.2022.957961</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Atila</surname> <given-names>&#xdc;</given-names>
</name>
<name>
<surname>Ucar</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Akyol</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Ucar</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Plant leaf disease classification using efficientnet deep learning model</article-title>. <source>Ecol. Inf.</source> <volume>61</volume>, <fpage>101182</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ecoinf.2020.101182</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barbedo</surname> <given-names>J. G. A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Plant disease identification from individual lesions and spots using deep learning</article-title>. <source>Biosyst. Eng.</source> <volume>180</volume>, <fpage>96</fpage>&#x2013;<lpage>107</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.biosystemseng.2019.02.002</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Basavaiah</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Arlene Anthony</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Tomato leaf disease classification using multiple feature extraction techniques</article-title>. <source>Wireless Pers. Commun.</source> <volume>115</volume>, <fpage>633</fpage>&#x2013;<lpage>651</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11277-020-07590-x</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bhujel</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>N.-E.</given-names>
</name>
<name>
<surname>Arulmozhi</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Basak</surname> <given-names>J. K.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>H.-T.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A lightweight attention-based convolutional neural networks for tomato leaf disease classification</article-title>. <source>Agriculture</source> <volume>12</volume>, <fpage>228</fpage>. doi: <pub-id pub-id-type="doi">10.3390/agriculture12020228</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bi</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Duan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Kang</surname> <given-names>J.-R.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Mobilenet based apple leaf diseases identification</article-title>. <source>Mobile Networks Appl.</source> <volume>27</volume>, <fpage>172</fpage>&#x2013;<lpage>180</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11036-020-01640-1</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Botine&#x15f;tean</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Gruia</surname> <given-names>A. T.</given-names>
</name>
<name>
<surname>Jianu</surname> <given-names>I.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Utilization of seeds from tomato processing wastes as raw material for oil production</article-title>. <source>J. Material Cycles Waste Manage.</source> <volume>17</volume>, <fpage>118</fpage>&#x2013;<lpage>124</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10163-014-0231-4</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Boulent</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Foucher</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Theau</surname> <given-names>J.</given-names>
</name>
<name>
<surname>St-Charles</surname> <given-names>P.-L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Convolutional neural networks for the automatic identification of plant diseases</article-title>. <source>Front. Plant Sci.</source> <volume>10</volume>, <elocation-id>941</elocation-id>. doi: <pub-id pub-id-type="doi">10.3389/fpls.2019.00941</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Breiman</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Random forests</article-title>. <source>Mach. Learn.</source> <volume>45</volume>, <fpage>5</fpage>&#x2013;<lpage>32</lpage>. doi: <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Corder</surname> <given-names>G. W.</given-names>
</name>
<name>
<surname>Foreman</surname> <given-names>D. I.</given-names>
</name>
</person-group> (<year>2014</year>). <source>Nonparametric statistics: A step-by-step approach</source> (<publisher-name>John Wiley &amp; Sons</publisher-name>).</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cortes</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Vapnik</surname> <given-names>V.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Support-vector networks</article-title>. <source>Mach. Learn.</source> <volume>20</volume>, <fpage>273</fpage>&#x2013;<lpage>297</lpage>. doi: <pub-id pub-id-type="doi">10.1007/BF00994018</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="web">
<person-group person-group-type="author">
<collab>Dataset AI Challenger</collab>
</person-group> (<year>2018</year>) <source>AI Challenger 2018 Datasets</source>. Available at: <uri xlink:href="https://github.com/AIChallenger/AI_Challenger_2018">https://github.com/AIChallenger/AI_Challenger_2018</uri> (Accessed <access-date>Nov. 1, 2022</access-date>).</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deng</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Xi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>An effective image-based tomato leaf disease segmentation method using mc-unet</article-title>. <source>Plant Phenomics</source> <volume>5</volume>, <fpage>0049</fpage>. doi: <pub-id pub-id-type="doi">10.34133/plantphenomics.0049</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghofrani</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Toroghi</surname> <given-names>R. M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Knowledge distillation in plant disease recognition</article-title>. <source>Neural Computing Appl</source>. doi: <pub-id pub-id-type="doi">10.1007/s00521-021-06882-y</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gupta</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A segmentation algorithm for the leaf area identification in plant&#x2019;s images</article-title>. <source>Sci. Technol. Asia</source>, <fpage>171</fpage>&#x2013;<lpage>178</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.14456/scitechasia.2022.33</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Habib</surname> <given-names>M. T.</given-names>
</name>
<name>
<surname>Majumder</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Jakaria</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Akter</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Uddin</surname> <given-names>M. S.</given-names>
</name>
<name>
<surname>Ahmed</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Machine vision based papaya disease recognition</article-title>. <source>J. King Saud University-Computer Inf. Sci.</source> <volume>32</volume>, <fpage>300</fpage>&#x2013;<lpage>309</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jksuci.2018.06.006</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Harakannanavar</surname> <given-names>S. S.</given-names>
</name>
<name>
<surname>Rudagi</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Puranikmath</surname> <given-names>V. I.</given-names>
</name>
<name>
<surname>Siddiqua</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Pramodhini</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Plant leaf disease detection using computer vision and machine learning algorithms</article-title>. <source>Global Transitions Proc.</source> <volume>3</volume>, <fpage>305</fpage>&#x2013;<lpage>310</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.gltp.2022.03.016</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Deep residual learning for image recognition</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</conf-name>, <conf-loc>Las Vegas, USA</conf-loc>. (<publisher-loc>Piscataway, NJ</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>770</fpage>&#x2013;<lpage>778</lpage>.</citation>
</ref>
<ref id="B19">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Hinton</surname> <given-names>G. E.</given-names>
</name>
<name>
<surname>Vinyals</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Dean</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Distilling the knowledge in a neural network</article-title>. Available at: <uri xlink:href="https://arxiv.org/abs/1503.02531">https://arxiv.org/abs/1503.02531</uri>.</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Howard</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sandler</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Chu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>L.-C.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Tan</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Searching for mobilenetv3</article-title>. <source>Proc. IEEE/CVF Int. Conf. Comput. vision.</source> <volume>2019</volume>, <fpage>1314</fpage>&#x2013;<lpage>1324</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ICCV.2019.00140</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Howard</surname> <given-names>A. G.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Kalenichenko</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Weyand</surname> <given-names>T.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). &#x201c;<article-title>Mobilenets: Efficient convolutional neural networks for mobile vision applications</article-title>,&#x201d; in <conf-name>The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <publisher-loc>Seoul, Korea (South)</publisher-loc>. (<publisher-name>Piscataway, NJ: IEEE</publisher-name>).</citation>
</ref>
<ref id="B22">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Squeeze-and-excitation networks</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</conf-name>, <publisher-loc>Salt Lake City, USA. (Piscataway, NJ</publisher-loc>: <publisher-name>IEEE</publisher-name>).</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>M.-L.</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>Y.-H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Dataset of tomato leaves</article-title>. <source>Mendeley Data</source> <volume>1</volume>.</citation>
</ref>
<ref id="B24">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>van der Maaten</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Weinberger</surname> <given-names>K. Q.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Densely connected convolutional networks</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</conf-name>,  <conf-loc>Hawaii, USA</conf-loc>. (<publisher-loc>Munich, Germany</publisher-loc>), <fpage>4700</fpage>&#x2013;<lpage>4708</lpage>.</citation>
</ref>
<ref id="B25">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Hughes</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Salath&#xe9;</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>An open access repository of images on plant health to enable the development of mobile disease diagnostics</article-title>. Available at: <uri xlink:href="https://arxiv.org/abs/1511.08060">https://arxiv.org/abs/1511.08060</uri>.</citation>
</ref>
<ref id="B26">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Iandola</surname> <given-names>F. N.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Moskewicz</surname> <given-names>M. W.</given-names>
</name>
<name>
<surname>Ashraf</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Dally</surname> <given-names>W. J.</given-names>
</name>
<name>
<surname>Keutzer</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Squeezenet: Alexnet-level accuracy with 50x fewer parameters and 0.5 mb model size</article-title>. Available at: <uri xlink:href="https://arxiv.org/abs/1602.07360">https://arxiv.org/abs/1602.07360</uri>.</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ilyas</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Jin</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Siddique</surname> <given-names>M. I.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>S. J.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Chua</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Diana: A deep learning-based paprika plant disease and pest phenotyping system with disease severity analysis</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>, <elocation-id>983625</elocation-id>. doi: <pub-id pub-id-type="doi">10.3389/fpls.2022.983625</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Janarthan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Thuseethan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Rajasegarar</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Yearwood</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>P2op&#x2014;plant pathology on palms: A deep learning-based mobile solution for in-field plant disease detection</article-title>. <source>Comput. Electron. Agric.</source> <volume>202</volume>, <fpage>107371</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2022.107371</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ji</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Automatic detection and severity analysis of grape black measles disease based on deep learning and fuzzy logic</article-title>. <source>Comput. Electron. Agric.</source> <volume>193</volume>, <fpage>106718</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2022.106718</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ji</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Multi-label learning for crop leaf diseases recognition and severity estimation based on convolutional neural networks</article-title>. <source>Soft Computing</source> <volume>24</volume>, <fpage>15327</fpage>&#x2013;<lpage>15340</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00500-020-04866-z</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Recognition of rice leaf diseases and wheat leaf diseases based on multi-task deep transfer learning</article-title>. <source>Comput. Electron. Agric.</source> <volume>186</volume>, <fpage>106184</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2021.106184</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Karlekar</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Seal</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Soynet: Soybean leaf diseases classification</article-title>. <source>Comput. Electron. Agric.</source> <volume>172</volume>, <fpage>105342</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2020.105342</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Komodakis</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Zagoruyko</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Paying more attention to attention: improving the performance of convolutional neural networks via attention transfer. In</article-title>. Available at: <uri xlink:href="https://arxiv.org/abs/1612.03928">https://arxiv.org/abs/1612.03928</uri>.</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kumar</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Chandran</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Tomar</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Bhuyan</surname> <given-names>D. J.</given-names>
</name>
<name>
<surname>Grasso</surname> <given-names>S.</given-names>
</name>
<name>
<surname>S&#xe1;</surname> <given-names>A. G. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Valorization potential of tomato (solanum lycopersicum l.) seed: nutraceutical quality, food properties, safety aspects, and application as a health-promoting ingredient in foods</article-title>. <source>Horticulturae</source> <volume>8</volume>, <fpage>265</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/horticulturae8030265</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kumar</surname> <given-names>K. S.</given-names>
</name>
<name>
<surname>Paswan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Srivastava</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Tomato-a natural medicine and its health benefits</article-title>. <source>J. Pharmacognosy Phytochem.</source> <volume>1</volume>, <fpage>33</fpage>&#x2013;<lpage>43</lpage>.</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Shang</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Slvit: Shuffle-convolution-based lightweight vision transformer for effective diagnosis of sugarcane leaf diseases</article-title>. <source>J. King Saud University-Computer Inf. Sci.</source> <volume>35</volume>, <fpage>101401</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jksuci.2022.09.013</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>B.-Y.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>K.-J.</given-names>
</name>
<name>
<surname>Su</surname> <given-names>W.-H.</given-names>
</name>
<name>
<surname>Peng</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Two-stage convolutional neural networks for diagnosing the severity of alternaria leaf blotch disease of the apple tree</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <fpage>2519</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs14112519</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Meenakshi</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Swaraja</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Ch</surname> <given-names>U. K.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Grading of quality in tomatoes using multi-class svm</article-title>,&#x201d; in <conf-name>2019 3rd International Conference on Computing Methodologies and Communication (ICCMC)</conf-name> (<publisher-loc>Erode, India: Surya Engineering College,</publisher-loc> <publisher-name>IEEE</publisher-name>). <fpage>104</fpage>&#x2013;<lpage>107</lpage>.</citation>
</ref>
<ref id="B39">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Mokhtar</surname> <given-names>U.</given-names>
</name>
<name>
<surname>Ali</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Hassanien</surname> <given-names>A. E.</given-names>
</name>
<name>
<surname>Hefny</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Identifying two of tomatoes leaf viruses using support vector machine</article-title>,&#x201d; in <conf-name>Information Systems Design and Intelligent Applications: Proceedings of Second International Conference INDIA 2015</conf-name> (<publisher-loc>Kalyani, India</publisher-loc>), Vol. <volume>1</volume>. <fpage>771</fpage>&#x2013;<lpage>782</lpage>.</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nanehkaran</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Tian</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Al-Nabhan</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Recognition of plant leaf diseases based on computer vision</article-title>. <source>J. Ambient Intell. Humanized Computing</source> <volume>2020</volume>, <fpage>1</fpage>&#x2013;<lpage>18</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s12652-020-02505-x</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ozguven</surname> <given-names>M. M.</given-names>
</name>
<name>
<surname>Adem</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Automatic detection and classification of leaf spot disease in sugar beet using deep learning algorithms</article-title>. <source>Physica A: Stat. Mechanics its Appl.</source> <volume>535</volume>, <fpage>122537</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.physa.2019.122537</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pal</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Kumar</surname> <given-names>V.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Agridet: Plant leaf disease severity classification using agriculture detection framework</article-title>. <source>Eng. Appl. Artif. Intell.</source> <volume>119</volume>, <fpage>105754</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.engappai.2022.105754</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Patil</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Yaligar</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Meena</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Comparision of performance of classifiers-svm, rf and ann in potato blight disease detection using leaf images</article-title>,&#x201d; in <conf-name>2017 IEEE International Conference on Computational Intelligence and Computing research (ICCIC)</conf-name> <publisher-loc>Tamil Nadu, India. (Piscataway, NJ</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>5</lpage>.</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rahman</surname> <given-names>S. U.</given-names>
</name>
<name>
<surname>Alam</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Ahmad</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Arshad</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Image processing based system for the detection, identification and treatment of tomato leaf diseases</article-title>. <source>Multimedia Tools Appl.</source> <volume>82</volume>, <fpage>9431</fpage>&#x2013;<lpage>9445</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11042-022-13715-0</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roy</surname> <given-names>A. M.</given-names>
</name>
<name>
<surname>Bhaduri</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A deep learning enabled multi-class plant disease detection model based on computer vision</article-title>. <source>AI</source>. <volume>2</volume> (<issue>3</issue>), <fpage>413</fpage>&#x2013;<lpage>428</lpage>. doi: <pub-id pub-id-type="doi">10.3390/ai2030026</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Selvaraju</surname> <given-names>R. R.</given-names>
</name>
<name>
<surname>Cogswell</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Das</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Vedantam</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Parikh</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Batra</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Grad-cam: Visual explanations from deep networks via gradient-based localization</article-title>. <source>Proc. IEEE Int. Conf. Comput. Vision</source> <volume>2017</volume>, <fpage>618</fpage>&#x2013;<lpage>626</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ICCV.2017.74</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Septiyanti</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Meliana</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Characterization of nanoemulsion gotukola, mangosteen rind, cucumber and tomato extract for cosmetic raw material</article-title>. <source>J. Physics: Conf. Ser. (IOP Publishing)</source> <volume>1442</volume>, <fpage>012046</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1088/1742-6596/1442/1/012046</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharif</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Khan</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Iqbal</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Azam</surname> <given-names>M. F.</given-names>
</name>
<name>
<surname>Lali</surname> <given-names>M. I. U.</given-names>
</name>
<name>
<surname>Javed</surname> <given-names>M. Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Detection and classification of citrus diseases in agriculture based on optimized weighted segmentation and feature selection</article-title>. <source>Comput. Electron. Agric.</source> <volume>150</volume>, <fpage>220</fpage>&#x2013;<lpage>234</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2018.04.023</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shoaib</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Hussain</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Shah</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Ullah</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Shah</surname> <given-names>S. M.</given-names>
</name>
<name>
<surname>Ali</surname> <given-names>F.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Deep learning-based segmentation and classification of leaf images for detection of tomato plant disease</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>, <elocation-id>1031748</elocation-id>. doi: <pub-id pub-id-type="doi">10.3389/fpls.2022.1031748</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Simonyan</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zisserman</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Very deep convolutional networks for large-scale image recognition</article-title>. Available at: <uri xlink:href="https://arxiv.org/abs/1409.1556">https://arxiv.org/abs/1409.1556</uri>.</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Singh</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Jain</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Jain</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Kayal</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Kumawat</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Batra</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Plantdoc: A dataset for visual plant disease detection</article-title>. <source>Proc. 7th ACM IKDD CoDS 25th COMAD</source>, <fpage>249</fpage>&#x2013;<lpage>253</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3371158.3371196</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Singh</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Verma</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Alex</surname> <given-names>J. S. R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Disease and pest infection detection in coconut tree through deep learning techniques</article-title>. <source>Comput. Electron. Agric.</source> <volume>182</volume>, <fpage>105986</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2021.105986</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sujatha</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Chatterjee</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Jhanjhi</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Brohi</surname> <given-names>S. N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Performance of deep learning vs machine learning in plant leaf disease detection</article-title>. <source>Microprocessors Microsystems</source> <volume>80</volume>, <fpage>103615</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.micpro.2020.103615</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tan</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Le</surname> <given-names>Q. V.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Efficientnet: Rethinking model scaling for convolutional neural networks</article-title>,&#x201d; in <source>International conference on machine learning</source> (<publisher-name>PMLR</publisher-name>) <volume>2019</volume>, <fpage>6105</fpage>&#x2013;<lpage>6114</lpage>.</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thai</surname> <given-names>H.-T.</given-names>
</name>
<name>
<surname>Le</surname> <given-names>K.-H.</given-names>
</name>
<name>
<surname>Nguyen</surname> <given-names>N. L.-T.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Formerleaf: An efficient vision transformer for cassava leaf disease detection</article-title>. <source>Comput. Electron. Agric.</source> <volume>204</volume>, <fpage>107518</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2022.107518</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Thuseethan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Vigneshwaran</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Charles</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wimalasooriya</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Siamese network-based lightweight framework for tomato leaf disease recognition</article-title>. Available at: <uri xlink:href="https://arxiv.org/abs/2209.11214">https://arxiv.org/abs/2209.11214</uri>.</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A cucumber leaf disease severity classification method based on the fusion of deeplabv3+ and u-net</article-title>. <source>Comput. Electron. Agric.</source> <volume>189</volume>, <fpage>106373</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2021.106373</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Automatic image-based plant disease severity estimation using deep learning</article-title>. <source>Comput. Intell. Neurosci.</source> <volume>2017</volume>, <fpage>2917536</fpage>. doi: <pub-id pub-id-type="doi">10.1155/2017/2917536</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Dhbp: A dual-stream hierarchical bilinear pooling model for plant disease multi-task classification</article-title>. <source>Comput. Electron. Agric.</source> <volume>195</volume>, <fpage>106788</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2022.106788</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Woo</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Park</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>J.-Y.</given-names>
</name>
<name>
<surname>Kweon</surname> <given-names>I. S.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Cbam: Convolutional block attention module</article-title>,&#x201d; in <conf-name>Proceedings of the European conference on computer vision (ECCV)</conf-name>. (<publisher-loc>Munich, Germany, Berlin</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>3</fpage>&#x2013;<lpage>19</lpage>.</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wen</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Su</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Ds-detr: A model for tomato leaf disease segmentation and damage evaluation</article-title>. <source>Agronomy</source> <volume>12</volume>, <fpage>2023</fpage>. doi: <pub-id pub-id-type="doi">10.3390/agronomy12092023</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Yuan</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Tay</surname> <given-names>F. E.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Revisiting knowledge distillation via label smoothing regularization</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</conf-name>. (<publisher-loc>Virtual, Piscataway, NJ</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>3903</fpage>&#x2013;<lpage>3911</lpage>.</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zeng</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Lightweight dense-scale network (ldsnet) for corn leaf disease identification</article-title>. <source>Comput. Electron. Agric.</source> <volume>197</volume>, <fpage>106943</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2022.106943</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Griffiths</surname> <given-names>J. S.</given-names>
</name>
<name>
<surname>Marchand</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Bernards</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Tomato brown rugose fruit virus: An emerging and rapidly spreading plant rna virus that threatens tomato production worldwide</article-title>. <source>Mol. Plant Pathol.</source> <volume>23</volume>, <fpage>1262</fpage>&#x2013;<lpage>1277</lpage>. doi: <pub-id pub-id-type="doi">10.1111/mpp.13229</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>J.-H.</given-names>
</name>
<name>
<surname>Kong</surname> <given-names>F.-T.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>J.-Z.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>S.-Q.</given-names>
</name>
<name>
<surname>Zhai</surname> <given-names>Z.-F.</given-names>
</name>
</person-group> (<year>2018</year>a). <article-title>Automatic image segmentation method for cotton leaves with disease under natural environment</article-title>. <source>J. Integr. Agric.</source> <volume>17</volume>, <fpage>1800</fpage>&#x2013;<lpage>1814</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S2095-3119(18)61915-X</pub-id>
</citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Rao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Man</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Identification of cucumber leaf diseases using deep learning and small sample size for agricultural internet of things</article-title>. <source>Int. J. Distributed Sensor Networks</source> <volume>17</volume>, <fpage>15501477211007407</fpage>. doi: <pub-id pub-id-type="doi">10.1177/15501477211007407</pub-id>
</citation>
</ref>
<ref id="B67">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>b). &#x201c;<article-title>Shufflenet: An extremely efficient convolutional neural network for mobile devices</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</conf-name>, <conf-loc>Munich, Germany, (Munich, Germany)</conf-loc>. <fpage>6848</fpage>&#x2013;<lpage>6856</lpage>.</citation>
</ref>
<ref id="B68">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Cui</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Qiu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Decoupled knowledge distillation</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</conf-name>, <publisher-loc>New Orleans, LA, USA. (Piscataway, NJ</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>11953</fpage>&#x2013;<lpage>11962</lpage>.</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Peng</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Tomato leaf disease diagnosis based on improved convolution neural network by attention module</article-title>. <source>Agriculture</source> <volume>11</volume>, <fpage>651</fpage>. doi: <pub-id pub-id-type="doi">10.3390/agriculture11070651</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>