<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Neurosci.</journal-id>
<journal-title>Frontiers in Neuroscience</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Neurosci.</abbrev-journal-title>
<issn pub-type="epub">1662-453X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fnins.2023.1204385</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Neuroscience</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>An improved model using convolutional sliding window-attention network for motor imagery EEG classification</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Huang</surname> <given-names>Yuxuan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2278947/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Zheng</surname> <given-names>Jianxu</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Xu</surname> <given-names>Binxing</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Li</surname> <given-names>Xuhang</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Liu</surname> <given-names>Yu</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Wang</surname> <given-names>Zijian</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1329842/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Feng</surname> <given-names>Hua</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/394064/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Cao</surname> <given-names>Shiqi</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>School of Computer Science and Technology, Donghua University</institution>, <addr-line>Shanghai</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Neurosurgery and State Key Laboratory of Trauma, Burn and Combined Injury, Southwest Hospital, Third Military Medical University (Army Medical University)</institution>, <addr-line>Chongqing</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>Department of Orthopaedics of TCM Clinical Unit, The Sixth Medical Center, Chinese PLA General Hospital</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Shugeng Chen, Huashan Hospital, Fudan University, China</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Giulia Cisotto, University of Milano-Bicocca, Italy; Yuhu Shi, Shanghai Maritime University, China</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Zijian Wang <email>wang.zijian&#x00040;dhu.edu.cn</email></corresp>
<corresp id="c002">Hua Feng <email>fenghua8888&#x00040;vip.163.com</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>15</day>
<month>08</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>17</volume>
<elocation-id>1204385</elocation-id>
<history>
<date date-type="received">
<day>12</day>
<month>04</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>26</day>
<month>07</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2023 Huang, Zheng, Xu, Li, Liu, Wang, Feng and Cao.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Huang, Zheng, Xu, Li, Liu, Wang, Feng and Cao</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license> </permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>The classification model of motor imagery-based electroencephalogram (MI-EEG) is a new human-computer interface pattern and a new neural rehabilitation assessment method for diseases such as Parkinson&#x00027;s and stroke. However, existing MI-EEG models often suffer from insufficient richness of spatiotemporal feature extraction, learning ability, and dynamic selection ability.</p>
</sec>
<sec>
<title>Methods</title>
<p>To solve these problems, this work proposed a convolutional sliding window-attention network (CSANet) model composed of novel spatiotemporal convolution, sliding window, and two-stage attention blocks.</p>
</sec>
<sec>
<title>Results</title>
<p>The model outperformed existing state-of-the-art (SOTA) models in within- and between-individual classification tasks on commonly used MI-EEG datasets BCI-2a and Physionet MI-EEG, with classification accuracies improved by 4.22 and 2.02%, respectively.</p>
</sec>
<sec>
<title>Discussion</title>
<p>The experimental results also demonstrated that the proposed type token, sliding window, and local and global multi-head self-attention mechanisms can significantly improve the model&#x00027;s ability to construct, learn, and adaptively select multi-scale spatiotemporal features in MI-EEG signals, and accurately identify electroencephalogram signals in the unilateral motor area. This work provided a novel and accurate classification model for MI-EEG brain-computer interface tasks and proposed a feasible neural rehabilitation assessment scheme based on the model, which could promote the further development and application of MI-EEG methods in neural rehabilitation.</p>
</sec></abstract>
<kwd-group>
<kwd>EEG</kwd>
<kwd>motor imagery</kwd>
<kwd>brain computer interface</kwd>
<kwd>deep learning</kwd>
<kwd>CNN</kwd>
<kwd>attention</kwd>
</kwd-group>
<counts>
<fig-count count="10"/>
<table-count count="7"/>
<equation-count count="12"/>
<ref-count count="56"/>
<page-count count="17"/>
<word-count count="10283"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Neural Technology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1. Introduction</title>
<p>The electroencephalogram (EEG) is a non-invasive diagnostic technique that records brain activity by placing electrodes on the scalp to detect electrical signals from neurons in the brain. It can be used to diagnose various neurological disorders, such as epilepsy, sleep disorders, and cerebrovascular accidents. Additionally, it can decode and analyze the intentions of the brain for controlling external mechanical devices. The brain-computer interface (BCI) represents a new generation of human-computer interaction that harnesses the power of EEG devices to capture human neural signals, which are then analyzed and classified using pattern recognition algorithms to control computers (Vaid et al., <xref ref-type="bibr" rid="B46">2015</xref>). Motor imagery is a type of EEG signal that arises from the mental simulation or imagination of movements, resulting in neural patterns like those observed during actual physical movement. This EEG signal can be utilized for brain-machine communication, such as controlling prosthetics or wheelchairs, and also holds potential applications in neurorehabilitation (Abiri et al., <xref ref-type="bibr" rid="B1">2019</xref>).</p>
<p>MI is now widely used in various neurorehabilitation training programs (Jeunet et al., <xref ref-type="bibr" rid="B26">2015</xref>; Ron-Angevin et al., <xref ref-type="bibr" rid="B39">2017</xref>; Paris-Alemany et al., <xref ref-type="bibr" rid="B36">2019</xref>). In the training program, motor imagery promotes the regeneration of brain neurons and connectivity by internally simulating the movements of specific muscles. MI could also improve the coordination patterns during the formation process of motor skills and provide muscles with additional opportunities for skill practice, which aids in learning or regaining control of actual movements. It has been utilized to improve the muscle control and recovery abilities of patients with Parkinson&#x00027;s disease, post-stroke sequelae, post-brain injury sequelae, and joint diseases (Williams et al., <xref ref-type="bibr" rid="B51">2004</xref>; Moseley, <xref ref-type="bibr" rid="B34">2006</xref>; Tamir et al., <xref ref-type="bibr" rid="B45">2007</xref>; Zimmermann-Schlatter et al., <xref ref-type="bibr" rid="B56">2008</xref>). The classification of MI signals recorded by EEG (MI-EEG) was also used in the neurorehabilitation assessment in the training programs, which was limited by the classification performance of the MI-EEG algorithms (Chen et al., <xref ref-type="bibr" rid="B15">2022</xref>; Cuomo et al., <xref ref-type="bibr" rid="B17">2022</xref>; Binks et al., <xref ref-type="bibr" rid="B12">2023</xref>).</p>
<p>The traditional classification frameworks have utilized feature extraction techniques to manually extract features in the time-frequency domain of MI-EEG signals and subsequently classified the signals using machine learning algorithms, such as Filter Bank Common Spatial Pattern (FBCSP) (Chin et al., <xref ref-type="bibr" rid="B16">2009</xref>), Fast Fourier Transform (FFT) (Wang et al., <xref ref-type="bibr" rid="B50">2018b</xref>), Wavelet Transform (Qin and He, <xref ref-type="bibr" rid="B38">2005</xref>), Support Vector Machines (SVM) (Selim et al., <xref ref-type="bibr" rid="B42">2018</xref>), Linear Discriminant Analysis (LDA) (Steyrl et al., <xref ref-type="bibr" rid="B44">2014</xref>), and K-Nearest Neighbor (KNN) (Bhattacharyya et al., <xref ref-type="bibr" rid="B11">2010</xref>). However, these methods have high requirements for manual feature design and are greatly affected by designers and data, which is not conducive to the application and promotion of various scenarios, including neurorehabilitation assessment.</p>
<p>In recent years, deep learning algorithms, which have excelled in the fields of vision and language research, have significantly improved the classification performance of MI-EEG classification. Using deep learning algorithms, classifiers could automatically extract features without manual feature extraction or reliance on specific MI-EEG data. Deep learning methods such as Multi-Layer Perceptron (MLP) (Chatterjee and Bandyopadhyay, <xref ref-type="bibr" rid="B14">2016</xref>; Samuel et al., <xref ref-type="bibr" rid="B40">2017</xref>), Convolutional Neural Networks (CNN) (Dai et al., <xref ref-type="bibr" rid="B18">2019</xref>; Hou et al., <xref ref-type="bibr" rid="B24">2020</xref>; Li et al., <xref ref-type="bibr" rid="B31">2020</xref>; Zancanaro et al., <xref ref-type="bibr" rid="B54">2021</xref>; Altuwaijri et al., <xref ref-type="bibr" rid="B7">2022</xref>), Deep Belief Networks (DBN) (Xu and Plataniotis, <xref ref-type="bibr" rid="B53">2016</xref>), Recurrent Neural Networks (RNN) (Luo et al., <xref ref-type="bibr" rid="B32">2018</xref>; Kumar et al., <xref ref-type="bibr" rid="B29">2021</xref>), as well as Long Short-Term Memory (LSTM) in combination with CNN or RNN for spatiotemporal features (Wang et al., <xref ref-type="bibr" rid="B48">2018a</xref>; Khademi et al., <xref ref-type="bibr" rid="B28">2022</xref>), have been successfully proposed for MI-EEG tasks. The classification performance of these methods could far outperform traditional machine learning methods.</p>
<p>Nowadays, attention mechanisms with dynamic spatio-temporal feature extraction for deep learning are demonstrated to have strong adaptive feature extraction capabilities, which have been shown to help improve performance in various machine learning tasks (Bahdanau et al., <xref ref-type="bibr" rid="B10">2014</xref>). Within attention mechanisms, the multi-head self-attention model has dominated the development of the most advanced artificial intelligence algorithms (Vaswani et al., <xref ref-type="bibr" rid="B47">2017</xref>). Currently, a few attention-based deep learning algorithms have been proposed for EEG signal processing, and have been found to have breakthrough performance in epilepsy detection, emotion recognition, MI classification, and other tasks (Zhang et al., <xref ref-type="bibr" rid="B55">2020</xref>; Amin et al., <xref ref-type="bibr" rid="B9">2022</xref>). For example, Xie et al. (<xref ref-type="bibr" rid="B52">2022</xref>) have proposed a novel approach that utilizes multi-head self-attention combined with position embedding to enhance the classification performance of EEG on the Physionet dataset, achieving an accuracy of 68.54%. Furthermore, Altuwaijri and Muhammad (<xref ref-type="bibr" rid="B6">2022</xref>) have employed channel attention and spatial attention mechanisms to capture temporal and spatial features from EEG signals on the BCI-2a dataset, resulting in an accuracy of 83.63%. However, these methods lack comprehensive integration of multi-scale spatiotemporal features and also neglect adaptive attention selection for global features (Al-Saegh et al., <xref ref-type="bibr" rid="B3">2021</xref>; Altaheri et al., <xref ref-type="bibr" rid="B5">2021</xref>). Both defects may reduce the feature learning and selection abilities of the MI-EEG model and affect its performance.</p>
<p>To solve these problems, this article proposes a model for MI-EEG classification called the convolutional sliding window-attention network (CSANet). The model consists of three components. First, a convolution block consisting of multi-layered convolutional, pooling, and normalization layers for extracting spatiotemporal features was proposed to extract the spatiotemporal features of the EEG signal preliminarily. Second, a sliding window block with continuous and dilated sliding windows was proposed to further combine the feature tokens with local and global context information and the token of window type. Finally, an attention block with local and global attention mechanisms was proposed to highlight effective features, which was followed by a classifier consisting of fully connected layers. The CASNet was evaluated in two commonly used MI-EEG datasets and was demonstrated to outperform the state-of-the-art (SOTA) models. The plausible application framework of the accurate CSANet model in neurorehabilitation assessment was also proposed in the discussion chapter. The main contributions to this work are listed as follows:</p>
<list list-type="order">
<list-item><p>This article proposes a novel deep learning model for MI-EEG tasks that utilize multi-scale feature extraction modules with convolutional layers and sliding windows and feature optimization selection modules using attention mechanisms. The proposed model outperformed SOTA models in two commonly used MI-EEG datasets.</p></list-item>
<list-item><p>The spatiotemporal convolutional, continuous, and dilated sliding windows were proposed to extract effective correlated features from EEG signals to solve the problem of simple feature scale.</p></list-item>
<list-item><p>Local and global multi-head self-attention mechanisms were utilized to enhance the adaptive feature selection ability of different scale information associations in EEG signals between individuals.</p></list-item>
<list-item><p>A plausible application framework of the CASNet model was proposed to provide a possible solution for the neurorehabilitation assessment based on the brain-computer interface.</p></list-item>
</list>
</sec>
<sec id="s2">
<title>2. Methods</title>
<p>The framework of the proposed CSANet model is demonstrated in <xref ref-type="fig" rid="F1">Figure 1</xref>. The model comprises three sequential blocks: the convolutional block, the sliding window block, and the attention block. The convolutional block consists of three convolutional layers and two pooling layers. It extracts features from EEG signals in the time domain using convolutional layers for temporal, channel, and local feature extraction. The output feature sequence is then input into the sliding window block, which is composed of continuous and dilated sliding windows. This block extracts the local and global context information of feature sequences through two different sliding windows to improve the richness of feature expression. Finally, the features in sliding windows are adaptively selected in the attention block, in which the features in each sliding window are adaptively weighted using local attention according to the feature relationships within the window. After features in all windows are merged, global attention is utilized to weigh the features again according to the relationships between all features. The effective features are highlighted through the two-stage attention sub-blocks. Finally, two fully connected layers with SoftMax activation are used to convert the input EEG signals into the probability of each category.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>The structure of the proposed CSANet. It includes three blocks, which are the convolution block, the sliding window block, and the attention block.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnins-17-1204385-g0001.tif"/>
</fig>
<sec>
<title>2.1. Convolution block</title>
<p>This convolution block is similar to the feature extraction module in the ACTNet (Altaheri et al., <xref ref-type="bibr" rid="B4">2022</xref>). The convolution block is composed of temporal, channel, and spatial convolutional layers and two average pooling layers, as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>. The three convolutional layers sequentially extract and fuse the temporal, channel, and spatial features of EEG signals to form the effective feature sequence.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>The input of the convolution block is a two-dimensional matrix with channels (C) and time points (T), which goes through three convolution layers, which are temporal, channel, and local convolutional layers, and two pooling layers.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnins-17-1204385-g0002.tif"/>
</fig>
<p>The temporal convolutional layer receives the raw EEG signal with <italic>T</italic> time points and <italic>C</italic> channels in the time domain and extracts features along the time points in every single channel. It uses <italic>F</italic><sub>1</sub> convolutional kernel with a kernel size of 1 &#x000D7; <italic>F</italic><sub><italic>s</italic></sub>/4. <italic>F</italic><sub><italic>s</italic></sub> is the sampling rate of the EEG signals, which means that each convolutional kernel extracts the temporal patterns within 1/4 seconds. The filters in this layer slide over the time axis and extract low-level temporal features at all the time points, which lays the foundation for the construction of high-level temporal features in the sliding window block. A batch normalization layer follows the temporal convolutional layer.</p>
<p>The channel convolutional slayer receives the normalized low-level temporal features. The layer utilized the depth-wise convolutional layer to extract channel-spatial features from the input features for all the temporal features from the same time points. This layer used <italic>B</italic> depth-wise convolutional kernels with a kernel size of <italic>C</italic>&#x000D7;1, where C represents the number of channels. Each depth-wise convolutional kernel is applied to each input feature map and outputs <italic>B</italic> feature maps, which means that for the input <italic>F</italic><sub>1</sub> feature maps, the layer outputs <italic>F</italic><sub>2</sub> &#x0003D; <italic>F</italic><sub>1</sub>&#x000D7;<italic>B</italic> feature maps. This approach allows the channel convolutional layer to capture the valuable information of inter-channel dependencies in the EEG signals. A batch normalization layer and an Exponential Linear Unit (ELU) activation function follow the convolutional layer. Then, an average pooling layer with a pooling size of 1 &#x000D7; <italic>P</italic><sub>1</sub> is used to compress the features. This layer reduces the spatial dimensionality of the features obtained from the previous layer while retaining important information.</p>
<p>The local convolutional layer is designed to integrate the local spatiotemporal features with a convolutional kernel size of 1 &#x000D7; 16. A batch normalization, an ELU activation function, and an averaging pooling layer process the output feature sequence after the local convolutional layer. The pooling layer is set with a pooling size of 1 &#x000D7; <italic>P</italic><sub>2</sub>. The size of output features is <italic>F</italic><sub>2</sub>&#x000D7;<italic>T</italic><sub><italic>z</italic></sub>, where <italic>T</italic><sub><italic>z</italic></sub> &#x0003D; <italic>T</italic>/(<italic>P</italic><sub>1</sub><italic>P</italic><sub>2</sub>). The output features could be deemed as <italic>T</italic><sub><italic>z</italic></sub> sequential embedding token with <italic>F</italic><sub>2</sub> features. The token embedding sequence <bold>z</bold><sub><bold>c</bold></sub> is defined as:</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M1"><mml:mtable columnalign='left'><mml:mtr><mml:mtd><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>c</mml:mi></mml:mstyle></mml:msub><mml:mtext>&#x000A0;</mml:mtext><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mo stretchy='false'>[</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mn>1</mml:mn></mml:mstyle></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mn>2</mml:mn></mml:mstyle></mml:msup><mml:mo>,</mml:mo><mml:mo>&#x022EF;</mml:mo><mml:mo>,</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>T</mml:mi></mml:mstyle><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle></mml:msub></mml:mrow></mml:msup><mml:mo stretchy='false'>]</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msup><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mi>&#x0211D;</mml:mi><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x02264;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x02264;</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mi>z</mml:mi></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p><bold>E</bold><sup><italic>i</italic></sup> are the extracted token embeddings for <italic>i</italic>th token. The token embedding sequence is then input into the sliding window block to extract the high-level temporal features.</p>
</sec>
<sec>
<title>2.2. Sliding window block</title>
<p>The sliding window block with two types of sliding windows is proposed to further integrate the high-level spatiotemporal features from the output token embedding sequence of the convolution block, shown in <xref ref-type="fig" rid="F3">Figure 3</xref>. Parallel continuous sliding windows and dilated sliding windows are proposed in the sliding window block to extract different high-level token sequences from the token embedding sequence <bold>z</bold><sub><bold>c</bold></sub>.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Sliding window block consists of two types of sliding windows, which are continuous sliding window and dilated sliding window.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnins-17-1204385-g0003.tif"/>
</fig>
<p>The continuous sliding windows are proposed to find some high-level local effective information by extracting continuous token sequence <inline-formula><mml:math id="M3"><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> with continuous type token <italic>T</italic>1. <italic>T</italic>1 token is the trainable type embedding with <italic>F</italic><sub>2</sub> features, which is set as the first token of <inline-formula><mml:math id="M4"><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>. <inline-formula><mml:math id="M5"><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> is defined by:</p>
<disp-formula id="E3"><label>(2)</label><mml:math id="M6"><mml:mrow><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mrow><mml:mi>s</mml:mi><mml:mn>1</mml:mn></mml:mrow><mml:mi>i</mml:mi></mml:msubsup><mml:mo>=</mml:mo><mml:mo stretchy='false'>[</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mrow><mml:mi>t</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mrow><mml:mn>2</mml:mn><mml:mi>i</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mrow><mml:mn>2</mml:mn><mml:mi>i</mml:mi></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mo>&#x022EF;</mml:mo><mml:mo>,</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mrow><mml:mn>2</mml:mn><mml:mi>i</mml:mi><mml:mo>+</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mi>z</mml:mi></mml:msub><mml:mo>/</mml:mo><mml:mn>2</mml:mn><mml:mo>&#x02212;</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo stretchy='false'>]</mml:mo><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>1</mml:mn><mml:mo>&#x02264;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x02264;</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mi>z</mml:mi></mml:msub><mml:mo>/</mml:mo><mml:mn>4</mml:mn></mml:mrow></mml:math></disp-formula>
<p><inline-formula><mml:math id="M7"><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> is the <italic>i</italic>th continuous token sequence with <italic>T</italic><sub><italic>z</italic></sub>/2 tokens. The first token <bold>E</bold><sub><italic>t</italic>1</sub> is the type embedding for continuous sliding windows. There are totally <italic>T</italic><sub><italic>z</italic></sub>/4 identical <bold>E</bold><sub><italic>t</italic>1</sub> locating at the first token of <italic>T</italic><sub><italic>z</italic></sub>/4 continuous token sequences.</p>
<p>The dilated sliding windows are proposed to find effective global integrated information by extracting the discontinuous token sequence <inline-formula><mml:math id="M8"><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>, which is calculated by:</p>
<disp-formula id="E4"><label>(3)</label><mml:math id="M9"><mml:mrow><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mrow><mml:mi>s</mml:mi><mml:mn>2</mml:mn></mml:mrow><mml:mi>i</mml:mi></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo> <mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mtext>&#x000A0;</mml:mtext><mml:mrow><mml:mo>[</mml:mo> <mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mrow><mml:mi>t</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mn>1</mml:mn></mml:mstyle></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mn>3</mml:mn></mml:mstyle></mml:msup><mml:mo>,</mml:mo><mml:mo>&#x022EF;</mml:mo><mml:mo>,</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>T</mml:mi></mml:mstyle><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle></mml:msub><mml:mo>&#x02212;</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mn>1</mml:mn></mml:mstyle></mml:mrow></mml:msup></mml:mrow> <mml:mo>]</mml:mo></mml:mrow><mml:mtext>&#x000A0;</mml:mtext><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:mtext>&#x000A0;</mml:mtext><mml:mrow><mml:mo>[</mml:mo> <mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mrow><mml:mi>t</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mn>2</mml:mn></mml:mstyle></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mn>4</mml:mn></mml:mstyle></mml:msup><mml:mo>,</mml:mo><mml:mo>&#x022EF;</mml:mo><mml:mo>,</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>E</mml:mi></mml:mstyle><mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>T</mml:mi></mml:mstyle><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle></mml:msub></mml:mrow></mml:msup></mml:mrow> <mml:mo>]</mml:mo></mml:mrow><mml:mtext>&#x000A0;</mml:mtext><mml:mo>,</mml:mo><mml:mtext>&#x000A0;&#x000A0;</mml:mtext><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow> </mml:mrow></mml:mrow></mml:math></disp-formula>
<p><inline-formula><mml:math id="M10"><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> is the <italic>i</italic>th discontinuous token sequence with <italic>T</italic><sub><italic>z</italic></sub>/2 tokens. Tokens in the sequence are interval selected. The first token <bold>E</bold><sub><italic>t</italic>2</sub> in the sequence is the trainable type embedding for a dilated sliding window, with <italic>F</italic><sub>2</sub> features. Combining the two types of token sequences extracted by the sliding window block, the combined sequence <bold>z</bold><sub><italic>s</italic></sub> &#x0003D; [<bold>z</bold><sub><italic>s</italic>1</sub>, <bold>z</bold><sub><italic>s</italic>2</sub>], containing <italic>T</italic><sub><italic>z</italic></sub>/4 continuous and 2 discontinuous token sequences, is the output to the attention block.</p>
</sec>
<sec>
<title>2.3. Attention block</title>
<p>The attention mechanism is a powerful structure for capturing dependencies in images or sequential data, including EEG data. The attention block in the CSANet is proposed with a two-stage attention mechanism, including local attention and global attention, as shown in <xref ref-type="fig" rid="F4">Figure 4</xref>. The local and global attention subnetworks are based on EEG classification. We first classify the data using two types of sliding windows. The continuous sliding window splits the data continuously and in sequence, enhancing the characteristics of continuous data. The dilated sliding window splits the data at intervals, which allows for the extraction of data characteristics over larger spaces and longer time periods. The segmented data is then passed through local attention to extract small-scale local features. After merging these features, they go through global attention to extract global features. Global attention and local attention differ not only in the data they analyze but also structurally. Both use Multi-head Attention, but the difference lies in the additional MLP layer in the global attention module because the data is classified after global attention.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>The structure of the attention block. Local attention subnetworks are used to weight the tokens within each sequence. Then global attention is employed to adaptively weight all the tokens in all sequences.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnins-17-1204385-g0004.tif"/>
</fig>
<p>The local attention subnetwork is designed to adaptively weight the tokens in each sequence by capturing local dependencies among the tokens. The global attention subnetwork is designed to adaptively weight all the tokens according to the global attention of all tokens in all sequences.</p>
<p>The local attention subnetwork is composed of <italic>L</italic> parallel encoders. Each of the encoders contains a layer-normalization and a multi-head self-attention (MSA) layer, as shown in <xref ref-type="fig" rid="F5">Figure 5</xref>. The local attention subnetwork processes the input token embedding sequence <bold>z</bold><sup><italic>i</italic></sup> by:</p>
<disp-formula id="E5"><label>(4)</label><mml:math id="M11"><mml:mrow><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mi>l</mml:mi><mml:mi>i</mml:mi></mml:msubsup><mml:mo>=</mml:mo><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>A</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>L</mml:mi><mml:mi>N</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msup><mml:mo stretchy='false'>)</mml:mo><mml:mo>+</mml:mo><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msup><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msup><mml:mo>&#x02208;</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mi>s</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>1</mml:mn><mml:mo>&#x02264;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x02264;</mml:mo><mml:mi>L</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mi>z</mml:mi></mml:msub><mml:mo>/</mml:mo><mml:mn>4</mml:mn><mml:mtext>&#x000A0;</mml:mtext><mml:mo>+</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>2</mml:mn></mml:mrow></mml:math></disp-formula>
<p><inline-formula><mml:math id="M12"><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> is the weighted token sequence by the local attention subnetwork. <italic>L</italic> is the number of sequences. <bold>z</bold><sup><italic>i</italic></sup> is a raw sequence extracted in the sliding window block. <italic>LN</italic> is the layer normalization operation. <italic>MSA</italic> is the multi-head self-attention function, which is composed of several self-attention encoders. A single self-attention encoder calculates the correlation weights of all the features in the token embedding. For each self-attention, three trainable matrices <bold>W</bold><sub><italic>q</italic></sub>, <bold>W</bold><sub><italic>k</italic></sub>, <inline-formula><mml:math id="M13"><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>W</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>v</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x000A0;</mml:mtext><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:msup></mml:math></inline-formula> were defined. These matrices transform the input features <bold>z</bold> into <bold>q, k</bold>, and <bold>v</bold> vectors, respectively.</p>
<disp-formula id="E6"><mml:math id="M14"><mml:mrow><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>q</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>W</mml:mi></mml:mstyle><mml:mi>q</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>k</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>W</mml:mi></mml:mstyle><mml:mi>k</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>v</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>W</mml:mi></mml:mstyle><mml:mi>v</mml:mi></mml:msub><mml:mtext>&#x000A0;</mml:mtext></mml:mrow></mml:math></disp-formula>
<p>For each head <italic>i</italic><sub><italic>h</italic></sub>, the matrices <bold>q</bold><bold>, </bold><bold>k</bold><bold>, </bold><bold>v</bold> are further transformed by linear transformations matrices <bold>W</bold><sub><italic>q, i</italic></sub>, <bold>W</bold><sub><italic>k, i</italic></sub>, <inline-formula><mml:math id="M15"><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>W</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>v</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x000A0;</mml:mtext><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msup></mml:math></inline-formula> to obtain <bold>q</bold><sub><italic>i</italic><sub><italic>h</italic></sub></sub>, <bold>k</bold><sub><italic>i</italic><sub><italic>h</italic></sub></sub>, <bold>v</bold><sub><italic>i</italic><sub><italic>h</italic></sub></sub>, respectively. The dimension of the head is <italic>D</italic><sub><italic>h</italic></sub>.</p>
<disp-formula id="E7"><label>(5)</label><mml:math id="M16"><mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>q</mml:mi></mml:mstyle><mml:mrow><mml:msub><mml:mi>i</mml:mi><mml:mi>h</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>q</mml:mi></mml:mstyle><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>W</mml:mi></mml:mstyle><mml:mrow><mml:mi>q</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>k</mml:mi></mml:mstyle><mml:mrow><mml:msub><mml:mi>i</mml:mi><mml:mi>h</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>k</mml:mi></mml:mstyle><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>W</mml:mi></mml:mstyle><mml:mrow><mml:mi>k</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>v</mml:mi></mml:mstyle><mml:mrow><mml:msub><mml:mi>i</mml:mi><mml:mi>h</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>v</mml:mi></mml:mstyle><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>W</mml:mi></mml:mstyle><mml:mrow><mml:mi>v</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></disp-formula>
<p>For each head <italic>i</italic><sub><italic>h</italic></sub>, <bold>q</bold><sub><italic>i</italic><sub><italic>h</italic></sub></sub> and <bold>k</bold><sub><italic>i</italic><sub><italic>h</italic></sub></sub> calculate the scaled dot-product attention by dividing by the square of <italic>D</italic><sub><italic>h</italic></sub> and then by the SoftMax function. Finally, the output weights in one head are obtained by multiplying with <bold>v</bold><sub><italic>i</italic><sub><italic>h</italic></sub></sub>:</p>
<disp-formula id="E8"><label>(6)</label><mml:math id="M17"><mml:mrow><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>A</mml:mi></mml:mstyle><mml:mo stretchy='false'>(</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mo stretchy='false'>)</mml:mo><mml:mo>=</mml:mo><mml:mi>S</mml:mi><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mi>t</mml:mi><mml:mi>M</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>q</mml:mi></mml:mstyle><mml:mrow><mml:msub><mml:mi>i</mml:mi><mml:mi>h</mml:mi></mml:msub></mml:mrow></mml:msub><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>k</mml:mi></mml:mstyle><mml:mrow><mml:msub><mml:mi>i</mml:mi><mml:mi>h</mml:mi></mml:msub></mml:mrow></mml:msub><mml:msup><mml:mrow></mml:mrow><mml:mo>&#x022A4;</mml:mo></mml:msup></mml:mrow><mml:mrow><mml:msqrt><mml:mrow><mml:msub><mml:mi>D</mml:mi><mml:mi>h</mml:mi></mml:msub></mml:mrow></mml:msqrt></mml:mrow></mml:mfrac></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>v</mml:mi></mml:mstyle><mml:mrow><mml:msub><mml:mi>i</mml:mi><mml:mi>h</mml:mi></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></disp-formula>
<p><bold>A</bold>(<bold>z</bold>) is the self-attention weight for one head self-attention encoder. The weights of the multi-head self-attentions are composed of each head of self-attention weight. The weight is used to scale the raw token embedding sequence by:</p>
<disp-formula id="E9"><label>(7)</label><mml:math id="M18"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle><mml:mo>&#x000B7;</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>;</mml:mo><mml:msub><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>;</mml:mo><mml:mo>&#x022EF;</mml:mo><mml:mspace width="0.3em" class="thinspace"/><mml:mo>;</mml:mo><mml:msub><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p><italic>MSA</italic>(<bold>z</bold>) is the weighted token sequence of multi-head self-attention. <italic>A</italic><sub><italic>i</italic></sub>(<bold>z</bold>) is the self-attention weight calculated by the self-attention encoder. <italic>h</italic> is the number of heads.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>Structures of the multi-head self-attention encoder and the self-attention encoder.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnins-17-1204385-g0005.tif"/>
</fig>
<p>All the weighted token embedding sequences <inline-formula><mml:math id="M19"><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> are then concatenated into one token sequence <bold>z</bold><sub><italic>a</italic></sub> with <italic>T</italic><sub><italic>z</italic></sub>/2 &#x000D7; (<italic>T</italic><sub><italic>z</italic></sub>/4 &#x0002B; 2) tokens. All the tokens are weighted by the local attention subnetwork and put into the global attention subnetwork.</p>
<p>The global attention subnetwork contains two layers of normalization, namely, a MSA and a fully connected layer. The structure of the multi-head self-attention is identical to the structure of the local attention subnetwork, which weights the tokens by:</p>
<disp-formula id="E10"><label>(8)</label><mml:math id="M20"><mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:msup><mml:mi>z</mml:mi><mml:mo>&#x02032;</mml:mo></mml:msup></mml:mstyle><mml:mi>a</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>A</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>L</mml:mi><mml:mi>N</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mi>a</mml:mi></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:mo stretchy='false'>)</mml:mo><mml:mo>+</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mi>a</mml:mi></mml:msub></mml:mrow></mml:math></disp-formula>
<p><bold>z</bold><sub><italic>a</italic></sub> is the global token sequence. <inline-formula><mml:math id="M21"><mml:msub><mml:mrow><mml:msup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the output global token sequence weighted by the global attention subnetwork. Finally, a layer normalization layer, a fully connected layer with the ELU function, and a fully connected layer with the SoftMax function are used to calculate the probabilities of different MI categories as follows:</p>
<disp-formula id="E11"><label>(9)</label><mml:math id="M22"><mml:mrow><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>y</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mi>s</mml:mi><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>M</mml:mi><mml:mi>L</mml:mi><mml:mi>P</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>M</mml:mi><mml:mi>L</mml:mi><mml:mi>P</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>L</mml:mi><mml:mi>N</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:msup><mml:mi>z</mml:mi><mml:mo>&#x02032;</mml:mo></mml:msup></mml:mstyle><mml:mi>a</mml:mi></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:mo stretchy='false'>)</mml:mo><mml:mo stretchy='false'>)</mml:mo><mml:mo>+</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:msup><mml:mi>z</mml:mi><mml:mo>&#x02032;</mml:mo></mml:msup></mml:mstyle><mml:mi>a</mml:mi></mml:msub><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:math></disp-formula>
<p><italic>MLP</italic> is the linear projection operation of the fully connected layers. <bold>y</bold> is the output probability of MI categories.</p>
</sec>
<sec>
<title>2.4. Experimental settings</title>
<p>CSANet was trained and evaluated in the within- and between-individual classification tasks in two public MI-EEG four classification datasets: the BCI Competition IV-2a (BCI-2a) dataset (Brunner et al., <xref ref-type="bibr" rid="B13">2008</xref>) and the Physionet MI-EEG dataset (Goldberger et al., <xref ref-type="bibr" rid="B21">2000</xref>). The details of the two datasets are presented in <xref ref-type="table" rid="T1">Table 1</xref>, and the electrodes used in the two datasets are depicted in <xref ref-type="fig" rid="F6">Figure 6</xref>.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Two datasets and the methods within and between individual classification tasks of the two experiments.</p></caption> 
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left" colspan="2"></th>
<th valign="top" align="left"><bold>BCI-2a dataset</bold></th>
<th valign="top" align="left"><bold>Physionet dataset</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="2">Created in</td>
<td valign="top" align="left">2008</td>
<td valign="top" align="left">2004</td>
</tr> <tr>
<td valign="top" align="left" colspan="2">Subjects</td>
<td valign="top" align="left">9</td>
<td valign="top" align="left">109</td>
</tr> <tr>
<td valign="top" align="left" colspan="2">Sessions</td>
<td valign="top" align="left">2 (one for training, one for testing)</td>
<td valign="top" align="left">1</td>
</tr> <tr>
<td valign="top" align="left" colspan="2">Trials</td>
<td valign="top" align="left">288</td>
<td valign="top" align="left">84</td>
</tr> <tr>
<td valign="top" align="left" colspan="2">MI task types</td>
<td valign="top" align="left">Left hand, right hand, foot, and tongue</td>
<td valign="top" align="left">Left hand, right hand, foot, and tongue</td>
</tr> <tr>
<td valign="top" align="left" colspan="2">Electrodes</td>
<td valign="top" align="left">22</td>
<td valign="top" align="left">18 of 64</td>
</tr> <tr>
<td valign="top" align="left" colspan="2">Sampling rate</td>
<td valign="top" align="left">250 Hz</td>
<td valign="top" align="left">160 Hz</td>
</tr> <tr>
<td valign="top" align="left" colspan="2">Time of one motion</td>
<td valign="top" align="left">4.5s</td>
<td valign="top" align="left">4s</td>
</tr> <tr>
<td valign="top" align="left" colspan="2">Time points</td>
<td valign="top" align="left">1,125</td>
<td valign="top" align="left">640</td>
</tr> <tr>
<td valign="top" align="left" rowspan="3">Within-individual classification task</td>
<td valign="top" align="left">Training set</td>
<td valign="top" align="left">First session</td>
<td valign="top" align="left">90% data</td>
</tr>
 <tr>
<td valign="top" align="left">Test set</td>
<td valign="top" align="left">Second session</td>
<td valign="top" align="left">10% data</td>
</tr>
 <tr>
<td valign="top" align="left">Method</td>
<td valign="top" align="left">-</td>
<td valign="top" align="left">10-fold cross-validation</td>
</tr> <tr>
<td valign="top" align="left" rowspan="3">Between-individual classification task</td>
<td valign="top" align="left">Training set</td>
<td valign="top" align="left">All subject second session</td>
<td valign="top" align="left">Data of 9 or 10 individual</td>
</tr>
 <tr>
<td valign="top" align="left">Test set</td>
<td valign="top" align="left">Second session</td>
<td valign="top" align="left">Data of the other 100 or 99 individuals</td>
</tr>
<tr>
<td valign="top" align="left">Method</td>
<td valign="top" align="left">Leave-one-out cross-validation</td>
<td valign="top" align="left">11-fold cross-validation</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>The electrodes used in the two MI-EEG datasets. <bold>(A)</bold> The BCI Competition IV-2a dataset collected EEG signals from 22 electrodes. <bold>(B)</bold> The Physionet MI-EEG dataset collected signals from 64 electrodes. In total 18 of them were used for MI classification.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnins-17-1204385-g0006.tif"/>
</fig>
<p>The BCI-2a dataset was created by Graz University of Technology in 2008. It consists of recordings from nine healthy subjects who underwent two sessions. The first session was used for training, while the second session was used for testing. Each session contained 288 trials, with each trial comprising one of four motor imagery tasks: movements of the left hand, right hand, foot, and tongue. MI-EEG signals were recorded using 22 Ag/AgCl electrodes (10&#x02013;20 international standard lead system) and were sampled at 250 Hz with a bandpass filter between 0.5 Hz and 100 Hz (with a 50 Hz notch filter enabled). In the within-individual classification task, the same training and testing data as the original competition were used, with the first session as the training set and the second session as the test set. In the between-individual classification task, the second session of all the subjects was used to be trained and tested. Leave-one-out cross-validation was employed in the classification task. At each validation, the data of one subject was selected as the test dataset, and the data of the other eight individuals were selected as the training dataset. The performance metrics were calculated across all individuals.</p>
<p>The Physionet MI-EEG dataset was recorded using the BCI2000 system according to the international 10-10 system and consists of recordings from 109 individuals. Each individual performed 84 trials comprising four types of MI tasks involving the left fist, right fist, both fists, and feet. There are 21 trails for each type of MI task. Each MI event lasted for 4 s, and the signals were sampled at 160 Hz. Each MI event had 640 time points. In the experiments, we used the electrode methods referenced in two papers (Singh et al., <xref ref-type="bibr" rid="B43">2019</xref>; Xie et al., <xref ref-type="bibr" rid="B52">2022</xref>). For motor imagery, the main location where the brain generates responses is the motor cortex, which is where the 18 electrodes we selected are located. Signals from 18 electrodes near the motor cortex (C1&#x02013;C6, CP1&#x02013;CP6, P1&#x02013;P6) were used in the model training and testing. In the within-individual classification task, 10-fold cross-validation was conducted for the data of each individual. At each validation, 10% of the data was used as the test dataset, and the remaining 90% was set as the training set. The classification accuracy was computed for each test set, and the average test metrics were calculated and reported. In the between-individual classification task, 11-fold cross-validation was conducted to evaluate the performance of the proposed model. In each validation, the data of 9 or 10 individuals were taken as the test set, and the data of the other 100 or 99 individuals were taken as the training set. The average performance metrics were calculated and reported.</p>
<p>Besides the performance experiments of within- and between-individual classification tasks, an ablation experiment was also conducted to test the effects of the proposed type token, sliding window, local attention subnetworks, and global attention subnetwork on the proposed CSANet. We also compared the performance of the proposed model with that of SOTA models in the same within- and between-individual classification tasks. We also extracted the extracted features and utilized the t-distributed Stochastic Neighbor Embedding to evaluate if the extracted features could be distinct in different types of MI tasks.</p>
<p>All the experiments were conducted on a machine with 12 CPU cores, one NVIDIA GeForce RTX 3090, Ubuntu 18.04, Python 3.8, and TensorFlow 2.4. The hyperparameters used in the two datasets are shown in <xref ref-type="table" rid="T2">Table 2</xref>. <xref ref-type="table" rid="T3">Table 3</xref> shows the detailed structures of the proposed models for different datasets and the output of each layer in the BCI dataset and Physionet model. All the models were trained for 1,000 epochs with an Adam optimizer at a learning rate of 0.009 and a batch size of 64. The cross-entropy was used as the loss function in all experiments. These training hyperparameters are determined by manual tuning in the training sets.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>The hyperparameters of three blocks used in the BCI-2a and the Physionet MI-EEG datasets.</p></caption>
<table frame="box" rules="all">
<tbody>
<tr style="background-color:#e0e1e3">
<td valign="top" align="left" colspan="2"><bold>Convolutional block</bold></td>
</tr>
<tr>
<td valign="top" align="left">Temporal filters (<bold><italic>F</italic></bold><sub><bold>1</bold></sub>)</td>
<td valign="top" align="center">16</td>
</tr> <tr>
<td valign="top" align="left">Depth multiplier (B)</td>
<td valign="top" align="center">2</td>
</tr> <tr>
<td valign="top" align="left">Channel conv filters (<bold><italic>F</italic></bold><sub><bold>2</bold></sub>)</td>
<td valign="top" align="center">32</td>
</tr> <tr>
<td valign="top" align="left">First pooling size (<bold><italic>P</italic></bold><sub><bold>1</bold></sub>)</td>
<td valign="top" align="center">8</td>
</tr> <tr>
<td valign="top" align="left">Second pooling size (<bold><italic>P</italic></bold><sub><bold>2</bold></sub>)</td>
<td valign="top" align="center">7</td>
</tr> <tr>
<td valign="top" align="left">Dropout rate</td>
<td valign="top" align="center">0.3</td>
</tr> <tr style="background-color:#e0e1e3">
<td valign="top" align="center" colspan="2"><bold>Sliding window block</bold></td>
</tr> <tr>
<td valign="top" align="left">Number of windows (<bold><italic>T</italic></bold><sub><bold><italic>z</italic></bold></sub>/<bold>4</bold>&#x0002B;<bold>2</bold>)</td>
<td valign="top" align="center">7</td>
</tr> <tr>
<td valign="top" align="left">Dropout rate</td>
<td valign="top" align="center">0.3</td>
</tr> <tr>
<td valign="top" align="left">Attention block</td>
<td/>
</tr> <tr>
<td valign="top" align="left">Head size (<italic><bold>D</bold></italic><sub><bold><italic>h</italic></bold></sub>)</td>
<td valign="top" align="center">8</td>
</tr>
<tr>
<td valign="top" align="left">Dropout rate</td>
<td valign="top" align="center">0.5</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Detailed description of the proposed model, where C, number of channels; T, number of Time points; <bold>F</bold><sub><bold>s</bold></sub>, sample rate; <bold>F</bold><sub><bold>1</bold></sub>, number of temporal filters; B, number of convolution filters; <bold>P</bold><sub><bold>1</bold></sub>, number of first pooling filter; <bold>P</bold><sub><bold>2</bold></sub>, number of second pooling filter.</p></caption> 
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left"><bold>Layer type</bold></th>
<th valign="top" align="left"><bold>Maps</bold></th>
<th valign="top" align="center"><bold>Kernel size</bold></th>
<th valign="top" align="center"><bold>Output</bold></th>
<th valign="top" align="center"><bold>BCI output</bold></th>
<th valign="top" align="center"><bold>Physionet output</bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:#e0e1e3">
<td valign="top" align="center" colspan="6"><bold>Convolutional block</bold></td>
</tr> <tr>
<td valign="top" align="left">Temporal convolutional</td>
<td valign="top" align="left"><italic>F</italic><sub>1</sub></td>
<td valign="top" align="center">(1, <italic>F</italic><sub><italic>s</italic></sub>//4)</td>
<td valign="top" align="center">(<italic>F</italic><sub>1</sub>, T, C)</td>
<td valign="top" align="center">(16, 1,125, 22)</td>
<td valign="top" align="center">(16, 640, 18)</td>
</tr> <tr>
<td valign="top" align="left">Channel convolution</td>
<td valign="top" align="left">B</td>
<td valign="top" align="center">(C, 1)</td>
<td valign="top" align="center">(<italic>B</italic>&#x000D7;<italic>F</italic><sub>1</sub>, T, 1)</td>
<td valign="top" align="center">(32, 1,125 1)</td>
<td valign="top" align="center">(32, 640, 1)</td>
</tr> <tr>
<td valign="top" align="left">Average pooling</td>
<td/>
<td valign="top" align="center">(1, <italic>P</italic><sub>1</sub>)</td>
<td valign="top" align="center">(<italic>B</italic>&#x000D7;<italic>F</italic><sub>1</sub>, T// <italic>P</italic><sub>1</sub>, 1)</td>
<td valign="top" align="center">(32, 140, 1)</td>
<td valign="top" align="center">(32, 80, 1)</td>
</tr> <tr>
<td valign="top" align="left">Local convolutional</td>
<td valign="top" align="left"><italic>B</italic>&#x000D7;<italic>F</italic><sub>1</sub></td>
<td valign="top" align="center">(1, 16)</td>
<td valign="top" align="center">(<italic>B</italic>&#x000D7;<italic>F</italic><sub>1</sub>, T// <italic>P</italic><sub>1</sub>)</td>
<td valign="top" align="center">(32, 140)</td>
<td valign="top" align="center">(32, 80)</td>
</tr> <tr>
<td valign="top" align="left">Average pooling</td>
<td/>
<td valign="top" align="center">(1, <italic>P</italic><sub>2</sub>)</td>
<td valign="top" align="center">(<italic>B</italic>&#x000D7;<italic>F</italic><sub>1</sub>, T// <italic>P</italic><sub>1</sub>//<italic>P</italic><sub>2</sub>)</td>
<td valign="top" align="center">(32, 20)</td>
<td valign="top" align="center">(32, 11)</td>
</tr> <tr style="background-color:#e0e1e3">
<td valign="top" align="center" colspan="3"><bold>Sliding window block</bold></td>
<td valign="top" align="center"><italic>T</italic><sub><italic>z</italic></sub> &#x0003D; <italic>T</italic>// <italic>P</italic><sub>1</sub>/<italic>P</italic><sub>2</sub></td>
<td/>
<td/>
</tr> <tr>
<td valign="top" align="left">Continuous sliding window<break/> dilated sliding window</td>
<td valign="top" align="left"><italic>T</italic><sub><italic>z</italic></sub>/4</td>
<td valign="top" align="center">(<italic>B</italic>&#x000D7;<italic>F</italic><sub>1</sub>, <italic>T</italic><sub><italic>z</italic></sub>/2)</td>
<td valign="top" align="center">(<italic>T</italic><sub><italic>z</italic></sub>//4 &#x0002B; 2, <italic>B</italic>&#x000D7;<italic>F</italic><sub>1</sub>, <italic>T</italic><sub><italic>z</italic></sub>//2)</td>
<td valign="top" align="center">(7, 32, 10)</td>
<td valign="top" align="center">(4 ,32, 5)</td>
</tr> <tr>
<td valign="top" align="left">Class token</td>
<td/>
<td/>
<td valign="top" align="center">(<italic>T</italic><sub><italic>z</italic></sub>//4 &#x0002B; 2, <italic>B</italic>&#x000D7;<italic>F</italic><sub>1</sub>, <italic>T</italic><sub><italic>z</italic></sub>/2 &#x0002B; 1)</td>
<td valign="top" align="center">(7, 32, 11)</td>
<td valign="top" align="center">(4 ,32, 6)</td>
</tr> <tr style="background-color:#e0e1e3">
<td valign="top" align="center" colspan="6"><bold>Attention block</bold></td>
</tr> <tr>
<td valign="top" align="left">Local attention</td>
<td/>
<td/>
<td valign="top" align="center">(<italic>T</italic><sub><italic>z</italic></sub>/4 &#x0002B; 2, <italic>B</italic>&#x000D7;<italic>F</italic><sub>1</sub>, <italic>T</italic><sub><italic>z</italic></sub>/2 &#x0002B; 1)</td>
<td valign="top" align="center">(7, 32, 11)</td>
<td valign="top" align="center">(4 ,32, 6)</td>
</tr> <tr>
<td valign="top" align="left">Concatenate</td>
<td/>
<td/>
<td valign="top" align="center">(1, <italic>T</italic><sub><italic>z</italic></sub>/4 &#x0002B; 2, <italic>B</italic>&#x000D7;<italic>F</italic><sub>1</sub>, <italic>T</italic><sub><italic>z</italic></sub>/2 &#x0002B; 1)</td>
<td valign="top" align="center">(7, 32, 11)</td>
<td valign="top" align="center">(4 ,32, 6)</td>
</tr> <tr>
<td valign="top" align="left">Global attention</td>
<td/>
<td/>
<td valign="top" align="center">(1, <italic>T</italic><sub><italic>z</italic></sub>/4 &#x0002B; 2, <italic>B</italic>&#x000D7;<italic>F</italic><sub>1</sub>, <italic>T</italic><sub><italic>z</italic></sub>/2 &#x0002B; 1)</td>
<td valign="top" align="center">(7, 32, 11)</td>
<td valign="top" align="center">(4 ,32, 6)</td>
</tr>
<tr style="background-color:#e0e1e3">
<td valign="top" align="center" colspan="6"><bold>Fully connected layer</bold></td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>2.5. Performance metrics</title>
<p>The accuracy and Kappa scores were used as the evaluation metrics of the performance in all performances, which were commonly used in the EEG signal classification tasks. The accuracy is calculated by:</p>
<disp-formula id="E12"><label>(10)</label><mml:math id="M23"><mml:mrow><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>A</mml:mi><mml:mi>C</mml:mi><mml:mi>C</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mstyle displaystyle='true'><mml:msubsup><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>i</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mn>1</mml:mn></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>N</mml:mi></mml:mstyle><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>c</mml:mi></mml:mstyle></mml:msub></mml:mrow></mml:msubsup><mml:mrow><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>T</mml:mi></mml:mstyle><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>P</mml:mi></mml:mstyle><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>i</mml:mi></mml:mstyle></mml:msub></mml:mrow></mml:mstyle></mml:mrow><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>N</mml:mi></mml:mstyle></mml:mfrac><mml:mtext>&#x000A0;</mml:mtext><mml:mo stretchy='false'>(</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mn>10</mml:mn></mml:mstyle><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:math></disp-formula>
<p><italic>ACC</italic> is the accuracy. <italic>N</italic> is the number of samples in the training or test dataset, and <italic>TP</italic><sub><italic>i</italic></sub> is the number of true positives (correctly predicted positive samples) in class <italic>i</italic>. <italic>N</italic><sub><italic>c</italic></sub> is the number of MI task categories. For both datasets, <italic>N</italic><sub><italic>c</italic></sub> &#x0003D; 4. The range of accuracy is between 0 and 1; higher accuracy means a better model. Kappa is the measurement of consistency between two variables. In the experiments, it was used to measure the consistency between the true class labels and the predicted class labels. It is defined by:</p>
<disp-formula id="E13"><label>(11)</label><mml:math id="M24"><mml:mrow><mml:mi>&#x003BA;</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi>c</mml:mi></mml:msub></mml:mrow></mml:mfrac><mml:mstyle displaystyle='true'><mml:msubsup><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>o</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi>c</mml:mi></mml:msub></mml:mrow></mml:msubsup><mml:mrow><mml:mfrac><mml:mrow><mml:msub><mml:mi>P</mml:mi><mml:mi>o</mml:mi></mml:msub><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mi>e</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mi>e</mml:mi></mml:msub></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:mrow></mml:math></disp-formula>
<p>&#x003BA; is the calculated Kappa score. <italic>P</italic><sub><italic>o</italic></sub>is the observed consistency rate for the class <italic>o</italic>, and <italic>P</italic><sub><italic>e</italic></sub> is the expected consistency rate by chance.</p>
</sec>
</sec>
<sec id="s3">
<title>3. Experimental and results</title>
<sec>
<title>3.1. Results of the ablation experiment</title>
<p>The ablation experiments were conducted to assess the efficacy of the proposed type token, sliding window, local attention subnetworks, and global attention subnetworks in the CSANet model for within-individual classification tasks on the BCI-2a dataset. The results are presented in <xref ref-type="table" rid="T4">Table 4</xref>.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>The results of ablation experiments.</p></caption> 
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left"><bold>Model no</bold>.</th>
<th valign="top" align="left" colspan="2"><bold>Sliding window block</bold></th>
<th valign="top" align="left" colspan="2"><bold>Attention block</bold></th>
<th valign="top" align="left"><bold>Accuracy (%)</bold></th>
<th valign="top" align="left"><bold>&#x003BA;</bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<td/>
<td valign="top" align="left"><bold>Sliding window</bold></td>
<td valign="top" align="left"><bold>Type token</bold></td>
<td valign="top" align="left"><bold>Local attention</bold></td>
<td valign="top" align="left"><bold>Global attention</bold></td>
<td/>
<td/>
</tr> <tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">81.25</td>
<td valign="top" align="left">0.750</td>
</tr> <tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">83.02</td>
<td valign="top" align="left">0.775</td>
</tr> <tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">81.11</td>
<td valign="top" align="left">0.748</td>
</tr> <tr>
<td valign="top" align="left">4</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">81.94</td>
<td valign="top" align="left">0.759</td>
</tr> <tr>
<td valign="top" align="left">5</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">81.54</td>
<td valign="top" align="left">0.756</td>
</tr> <tr>
<td valign="top" align="left">6</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">83.33</td>
<td valign="top" align="left">0.778</td>
</tr> <tr>
<td valign="top" align="left">7</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">82.99</td>
<td valign="top" align="left">0.773</td>
</tr> <tr>
<td valign="top" align="left">8</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">82.64</td>
<td valign="top" align="left">0.768</td>
</tr> <tr>
<td valign="top" align="left">9</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">X</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">83.68</td>
<td valign="top" align="left">0.782</td>
</tr>
<tr>
<td valign="top" align="left">10</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">&#x02713;</td>
<td valign="top" align="left">84.08</td>
<td valign="top" align="left">0.784</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>The token represents the two types of sliding windows in the sliding window block.</p>
</table-wrap-foot>
</table-wrap>
<p>There is no separate ablation experiment for the type token in the sliding window block because the model needs to use the sliding window before adding the classification token. Therefore, the type token needs to be employed simultaneously with the sliding window. The same thing happens with the local attention. In the table, Model 1 represents the most fundamental model that solely employs CNN without a sliding window or self-attention mechanism, indicated by crosses in all columns of the table. Models 2 and 3 exclusively utilize individual modules, namely global attention and sliding window, respectively. Meanwhile, Models 4&#x02013;9 integrate different modules in diverse ways. Based on the results presented in <xref ref-type="table" rid="T4">Table 4</xref>, it is evident that the incorporation of a global attention subnet has a significant positive impact on model performance. Specifically, Model 2, which solely utilizes the global attention subnet, demonstrates an accuracy improvement of 1.77% compared to Model 1, which does not employ any proposed methods. The incorporation of both sliding window and global attention mechanisms in Model 6 yields a modest yet significant improvement in accuracy, with an increase of 0.21%. It is worth noting that while the inclusion of type tokens in Model 5 leads to a decrease in accuracy, the integration of the local attention subnet in Model 10 achieves an impressive accuracy rate of 84.08%, surpassing that of Model 6 by 0.75%. The type token method is found to be more effective when used in combination with the local attention subnet, as illustrated in Model 8, which exhibits a 0.7% increase in accuracy as compared to Model 4. The combination of global and local attention proves to be highly effective, resulting in significant performance improvements. For instance, Model 9 shows an increase in accuracy of 1.74% as compared to Model 4. Model 10 has a 2.83% higher accuracy rate than Model 1, indicating that our final proposed model with all modules significantly outperforms the original model. In summary, the results of our ablation experiments demonstrate that the proposed type token, sliding window, local attention subnet, and global attention subnet all have a positive impact on the performance of the MI-EEG classification task.</p>
</sec>
<sec>
<title>3.2. Results of the public datasets</title>
<p>The proposed CSANet method underwent training and testing for within- and between-individual classification tasks using the BCI-2a and Physionet-MI datasets, respectively. Its performance was subsequently compared with that of other state-of-the-art (SOTA) models.</p>
<sec>
<title>3.2.1. Results of the BCI-2a dataset</title>
<p>The proposed model was initially evaluated on the BCI-2a dataset through individual experiments, wherein the MI-EEG data of nine participants were separately trained and subsequently validated on a test set. The results obtained are presented in <xref ref-type="table" rid="T5">Table 5</xref>. In comparing the proposed CSANet model with three other SOTA models, including EEGNet (Lawhern et al., <xref ref-type="bibr" rid="B30">2018</xref>), EEG-TCNet (Ingolfsson et al., <xref ref-type="bibr" rid="B25">2020</xref>), and TCNet Fusion (Musallam et al., <xref ref-type="bibr" rid="B35">2021</xref>). It was evident that the former outperformed the other models with an accuracy improvement ranging from 0.4 to 4.4%, ultimately reaching an overall accuracy of 84.1%. This outcome serves to highlight the proposed model&#x00027;s superior learning and prediction capabilities, particularly for individual motor imagery EEG signal patterns, relative to existing models. The standard deviation (SD) of the accuracy is computed to two decimal places, and the kappa values are in decimal form. In our proposed model, the standard deviation of the accuracy is 9.11, which is slightly lower than the other models, but the difference is not substantial. The kappa value of 0.127 is also not significantly different from the other models.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>CSANet was compared to other models in the within-individual classification task of the BCI-2a dataset across nine subjects.</p></caption> 
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th/>
<th valign="top" align="center" colspan="2"><bold>Proposed (CSANet)</bold></th>
<th valign="top" align="center" colspan="2"><bold>EEGNet</bold></th>
<th valign="top" align="center" colspan="2"><bold>EEG-TCNet</bold></th>
<th valign="top" align="center" colspan="2"><bold>TCNet fusion</bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<td valign="top" align="left"><bold>Individual</bold></td>
<td valign="top" align="center"><bold>Acc (%)</bold></td>
<td valign="top" align="center"><bold>&#x003BA;</bold></td>
<td valign="top" align="center"><bold>Acc (%) (%)</bold></td>
<td valign="top" align="center"><bold>&#x003BA;</bold></td>
<td valign="top" align="center"><bold>Acc (%) (%)</bold></td>
<td valign="top" align="center"><bold>&#x003BA;</bold></td>
<td valign="top" align="center"><bold>Acc (%)</bold></td>
<td valign="top" align="center"><bold>&#x003BA;</bold></td>
</tr> <tr>
<td valign="top" align="left">1</td>
<td valign="top" align="center">86.11</td>
<td valign="top" align="center">0.814</td>
<td valign="top" align="center">88.57</td>
<td valign="top" align="center">0.851</td>
<td valign="top" align="center">84.07</td>
<td valign="top" align="center">0.796</td>
<td valign="top" align="center">90.74</td>
<td valign="top" align="center">0.871</td>
</tr> <tr>
<td valign="top" align="left">2</td>
<td valign="top" align="center">70.58</td>
<td valign="top" align="center">0.608</td>
<td valign="top" align="center">66.02</td>
<td valign="top" align="center">0.553</td>
<td valign="top" align="center">66.32</td>
<td valign="top" align="center">0.553</td>
<td valign="top" align="center">70.67</td>
<td valign="top" align="center">0.603</td>
</tr> <tr>
<td valign="top" align="left">3</td>
<td valign="top" align="center">95.13</td>
<td valign="top" align="center">0.941</td>
<td valign="top" align="center">95.11</td>
<td valign="top" align="center">0.943</td>
<td valign="top" align="center">94.11</td>
<td valign="top" align="center">0.927</td>
<td valign="top" align="center">95.23</td>
<td valign="top" align="center">0.933</td>
</tr> <tr>
<td valign="top" align="left">4</td>
<td valign="top" align="center">80.63</td>
<td valign="top" align="center">0.611</td>
<td valign="top" align="center">73.61</td>
<td valign="top" align="center">0.653</td>
<td valign="top" align="center">72.61</td>
<td valign="top" align="center">0.638</td>
<td valign="top" align="center">76.75</td>
<td valign="top" align="center">0.680</td>
</tr> <tr>
<td valign="top" align="left">5</td>
<td valign="top" align="center">84.38</td>
<td valign="top" align="center">0.791</td>
<td valign="top" align="center">75.46</td>
<td valign="top" align="center">0.677</td>
<td valign="top" align="center">76.06</td>
<td valign="top" align="center">0.688</td>
<td valign="top" align="center">82.24</td>
<td valign="top" align="center">0.767</td>
</tr> <tr>
<td valign="top" align="left">6</td>
<td valign="top" align="center">69.79</td>
<td valign="top" align="center">0.651</td>
<td valign="top" align="center">64.20</td>
<td valign="top" align="center">0.529</td>
<td valign="top" align="center">62.90</td>
<td valign="top" align="center">0.501</td>
<td valign="top" align="center">68.83</td>
<td valign="top" align="center">0.589</td>
</tr> <tr>
<td valign="top" align="left">7</td>
<td valign="top" align="center">94.10</td>
<td valign="top" align="center">0.921</td>
<td valign="top" align="center">90.36</td>
<td valign="top" align="center">0.873</td>
<td valign="top" align="center">89.96</td>
<td valign="top" align="center">0.871</td>
<td valign="top" align="center">94.22</td>
<td valign="top" align="center">0.923</td>
</tr> <tr>
<td valign="top" align="left">8</td>
<td valign="top" align="center">89.93</td>
<td valign="top" align="center">0.865</td>
<td valign="top" align="center">85.83</td>
<td valign="top" align="center">0.818</td>
<td valign="top" align="center">84.76</td>
<td valign="top" align="center">0.802</td>
<td valign="top" align="center">88.92</td>
<td valign="top" align="center">0.858</td>
</tr> <tr>
<td valign="top" align="left">9</td>
<td valign="top" align="center">93.75</td>
<td valign="top" align="center">0.916</td>
<td valign="top" align="center">86.57</td>
<td valign="top" align="center">0.821</td>
<td valign="top" align="center">85.49</td>
<td valign="top" align="center">0.810</td>
<td valign="top" align="center">85.98</td>
<td valign="top" align="center">0.811</td>
</tr> <tr>
<td valign="top" align="left">Average</td>
<td valign="top" align="center">84.08</td>
<td valign="top" align="center">0.784</td>
<td valign="top" align="center">80.59</td>
<td valign="top" align="center">0.741</td>
<td valign="top" align="center">83.34</td>
<td valign="top" align="center">0.776</td>
<td valign="top" align="center">83.73</td>
<td valign="top" align="center">0.780</td>
</tr>
<tr>
<td valign="top" align="left">SD</td>
<td valign="top" align="center">9.11</td>
<td valign="top" align="center">0.127</td>
<td valign="top" align="center">10.48</td>
<td valign="top" align="center">0.139</td>
<td valign="top" align="center">10.09</td>
<td valign="top" align="center">0.137</td>
<td valign="top" align="center">9.23</td>
<td valign="top" align="center">0.123</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>SD, is standard deviation.</p>
</table-wrap-foot>
</table-wrap>
<p>The t-distributed Stochastic Neighbor Embedding (t-SNE) method was used to visualize the extracted features in the proposed models and other models, as shown in <xref ref-type="fig" rid="F7">Figure 7</xref>. In recent years, t-SNE has emerged as a popular tool for data visualization owing to its ability to preserve the local structure of high-dimensional data. Specifically, t-SNE maps high-dimensional data points to a lower-dimensional space while simultaneously preserving the pairwise similarities between them. In this experiment, the features in the global attention layer are extracted and visualized using t-SNE. The horizontal and vertical axes do not possess any physical significance; rather, they represent the two primary components following data dimensionality reduction. During the t-SNE mapping process, these axes are selected to optimize the preservation of local structures within the original high-dimensional dataset (i.e., points that are proximal in high-dimensional space remain so after dimensionality reduction). The principal objective of a t-SNE diagram is data visualization. Based on the visualization results, it is evident that the features extracted by the proposed model exhibit superior distinguishability in the projection of the four categories as compared to other models. Notably, the features extracted by EEGNet demonstrate the poorest distinguishability.</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>The t-SNE visualization results for the extracted features from the four models for the within-individual classification tasks of BCI-2a dataset. The proposed model CSPNet is shown in <bold>(A)</bold>. <bold>(B)</bold> EEGNet that appears to have less accurate classification performance than the other three models. <bold>(C)</bold> EEG-TCNet. <bold>(D)</bold> TCNet fusion. Four classes include motor imagery of left hand (red), right hand (green), foot (blue), and tongue (purple).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnins-17-1204385-g0007.tif"/>
</fig>
<p>In <xref ref-type="fig" rid="F8">Figure 8</xref>, the accuracy of four models in four categories was compared. The best performance in left-hand motor imagery classification recognition was achieved by TCN Fusion, with an accuracy of 86%. Similarly, an accuracy of 86% was achieved in right-hand classification imagery recognition by EEG-TCNet. The proposed CSANet demonstrated excellent performance in foot and tongue motor imagery classification, with accuracies of 88% for both. This indicates that the accuracy of recognizing the activity of the somatotopic area of the unilateral motor cortex is higher in the proposed model, resulting in a significant improvement in the recognition accuracy of foot and tongue motor imagery. However, the improvement in the classification and recognition accuracy of left- and right-hand motor imagery by the proposed model was not as significant, with accuracies of 80 and 84%, respectively. This result suggests that there may be a need to improve the processing and discrimination of information regarding left-right brain symmetry by the proposed model.</p>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>The confusion matrix of four classes in the within-individual classification task of the BCI-2a dataset. <bold>(A)</bold> The proposed CSANet, while <bold>(B&#x02013;D)</bold> respectively represent EEGNet, EEG-TCNet, and TCNet fusion.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnins-17-1204385-g0008.tif"/>
</fig>
<p>In addition to the individual classification task, the between-individual classification performance of the proposed model on the BCI-2a dataset was also evaluated, as shown in <xref ref-type="fig" rid="F9">Figure 9</xref>. The confusion matrix revealed that the accuracy of the four categories was relatively consistent in the inter-individual classification task, with recognition accuracies of 74, 68, 69, and 69% for left hand, right hand, foot, and mouth motor imagery, respectively. Notably, the model demonstrated the highest classification accuracy for left-hand motor imagery, while the classification accuracy for other limb motor imagery was generally similar, resulting in an overall classification accuracy of 70.81%, which was lower than that of the within-individual classification task. These findings suggest that individual specificity still has a certain impact on the model&#x00027;s classification performance. However, the brain signals for left-hand motor imagery were found to be more distinguishable than the other three types of motor imagery signals, and this distinction was found to be cross-individual. The t-SNE results were found to be generally consistent with the confusion matrix results.</p>
<fig id="F9" position="float">
<label>Figure 9</label>
<caption><p>Performance results of CSANet in the between-individual task on the BCI-2a dataset. <bold>(A)</bold> The confusion matrix of four classes. <bold>(B)</bold> The visualized t-SNE results.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnins-17-1204385-g0009.tif"/>
</fig>
<p>CSANet was evaluated against state-of-the-art (SOTA) models on both individual and inter-individual classification tasks using the BCI-2a dataset. The overall mean accuracy and kappa values of the model were compared to those of other models, as presented in <xref ref-type="table" rid="T6">Table 6</xref>. A total of nine other deep learning models that were also tested on the BCI-2a dataset were compared. The proposed model achieved a higher accuracy of 84.08% and a kappa value of 0.784 in the within-individual classification task compared to other models. An improvement in accuracy of 0.35% and an increase in kappa value of 0.004 were observed. Additionally, the proposed model exhibited the best performance in between-individual classification, with an accuracy of 70.81% and a kappa value of 0.610. Compared with other models, the proposed model improved accuracy and kappa value by 0.23% and 0.002, respectively. Accuracy is mainly affected by the overall performance of the model. And the proposed model is robust. It is noteworthy that the proposed model, which incorporates local and global attention and enriches feature types through multi-type sliding windows, demonstrated improved performance in the MI-EEG signal classification task compared to other attention-based models. This is a testament to the effectiveness of the various sub-modules proposed in CSANet for MI-EEG signal classification.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>Comparing against SOTA models on accuracy (%) and kappa value <bold>&#x003BA;</bold> on the BCI-2a dataset for both within- and between-individual four-class classification tasks.</p></caption> 
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left"><bold>References</bold></th>
<th valign="top" align="left"><bold>Method</bold></th>
<th valign="top" align="center"><bold>Accuracy (within, %)</bold></th>
<th valign="top" align="center"><bold>&#x003BA; (within)</bold></th>
<th valign="top" align="center"><bold>Accuracy (between, %)</bold></th>
<th valign="top" align="center"><bold>&#x003BA; (between)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Schirrmeister et al. (<xref ref-type="bibr" rid="B41">2017</xref>)</td>
<td valign="top" align="left">CNN</td>
<td valign="top" align="center">74.31</td>
<td valign="top" align="center">0.66</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
</tr> <tr>
<td valign="top" align="left">Lawhern et al. (<xref ref-type="bibr" rid="B30">2018</xref>)</td>
<td valign="top" align="left">EEGNet</td>
<td valign="top" align="center">80.59</td>
<td valign="top" align="center">0.741</td>
<td valign="top" align="center">68.79</td>
<td valign="top" align="center">0.584</td>
</tr> <tr>
<td valign="top" align="left">Hassanpour et al. (<xref ref-type="bibr" rid="B22">2019</xref>)</td>
<td valign="top" align="left">DBN-AE</td>
<td valign="top" align="center">71.0</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
</tr> <tr>
<td valign="top" align="left">Amin et al. (<xref ref-type="bibr" rid="B8">2019</xref>)</td>
<td valign="top" align="left">Multi-layer-CNN and MLP</td>
<td valign="top" align="center">75.0</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">55.3</td>
<td valign="top" align="center">-</td>
</tr> <tr>
<td valign="top" align="left">Ingolfsson et al. (<xref ref-type="bibr" rid="B25">2020</xref>)</td>
<td valign="top" align="left">EEG-TCNet</td>
<td valign="top" align="center">83.34</td>
<td valign="top" align="center">0.776</td>
<td valign="top" align="center">69.52</td>
<td valign="top" align="center">0.594</td>
</tr> <tr>
<td valign="top" align="left">Zhang et al. (<xref ref-type="bibr" rid="B55">2020</xref>)</td>
<td valign="top" align="left">Attention graph cnn</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">60.1</td>
<td valign="top" align="center">-</td>
</tr> <tr>
<td valign="top" align="left">Musallam et al. (<xref ref-type="bibr" rid="B35">2021</xref>)</td>
<td valign="top" align="left">TCNet_Fusion</td>
<td valign="top" align="center">83.73</td>
<td valign="top" align="center">0.780</td>
<td valign="top" align="center">70.58</td>
<td valign="top" align="center">0.608</td>
</tr> <tr>
<td valign="top" align="left">Amin et al. (<xref ref-type="bibr" rid="B9">2022</xref>)</td>
<td valign="top" align="left">Attention-inception CNN and LSTM</td>
<td valign="top" align="center">82.84</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
</tr> <tr>
<td valign="top" align="left">Altuwaijri et al. (<xref ref-type="bibr" rid="B7">2022</xref>)</td>
<td valign="top" align="left">Attention multi-branch CNN</td>
<td valign="top" align="center">82.87</td>
<td valign="top" align="center">0.772</td>
<td valign="top" align="center">69.10</td>
<td valign="top" align="center">-</td>
</tr>
<tr>
<td valign="top" align="left">This work</td>
<td valign="top" align="left">CSANet</td>
<td valign="top" align="center">84.08</td>
<td valign="top" align="center">0.784</td>
<td valign="top" align="center">70.81</td>
<td valign="top" align="center">0.610</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>3.2.2. Results of the Physionet MI-EEG dataset</title>
<p>In addition to the BCI-2a dataset, a comparison was conducted between the proposed CSANet model and other SOTA models on the Physionet MI-EEG dataset, covering both within-subject and between-subject tasks. The results presented in <xref ref-type="table" rid="T7">Table 7</xref> demonstrate that the highest accuracy in both intra-subject and inter-subject classification tasks was achieved by the proposed model, with accuracies of 92.36 and 70.56%, respectively. Notably, an improvement ranging from 4.22 to 24.16% and from 2.02 to 11.98% was observed compared to other SOTA models.</p>
<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>Comparing results on the Physionet MI-EEG dataset for both within- and between-individual four-class classification tasks.</p></caption> 
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left"><bold>References</bold></th>
<th valign="top" align="left"><bold>Method</bold></th>
<th valign="top" align="center"><bold>Within accuracy</bold></th>
<th valign="top" align="center"><bold>Between accuracy</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Ma et al. (<xref ref-type="bibr" rid="B33">2018</xref>)</td>
<td valign="top" align="left">RNN</td>
<td valign="top" align="center">68.20</td>
<td valign="top" align="center">-</td>
</tr> <tr>
<td valign="top" align="left">Pinheiro et al. (<xref ref-type="bibr" rid="B37">2018</xref>)</td>
<td valign="top" align="left">RNA</td>
<td valign="top" align="center">74.69</td>
<td valign="top" align="center">-</td>
</tr> <tr>
<td valign="top" align="left">Dose et al. (<xref ref-type="bibr" rid="B19">2018</xref>)</td>
<td valign="top" align="left">CNN</td>
<td valign="top" align="center">80.38</td>
<td valign="top" align="center">58.58</td>
</tr> <tr>
<td valign="top" align="left">Kar&#x000E1;csony et al. (<xref ref-type="bibr" rid="B27">2019</xref>)</td>
<td valign="top" align="left">CNN</td>
<td valign="top" align="center">76.37</td>
<td valign="top" align="center">-</td>
</tr> <tr>
<td valign="top" align="left">Wang et al. (<xref ref-type="bibr" rid="B49">2020</xref>)</td>
<td valign="top" align="left">EEGNet</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">68.20</td>
</tr> <tr>
<td valign="top" align="left">Ali et al. (<xref ref-type="bibr" rid="B2">2022</xref>)</td>
<td valign="top" align="left">ConTraNet CNN-Transformer</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">65.44</td>
</tr> <tr>
<td valign="top" align="left">Hou et al. (<xref ref-type="bibr" rid="B23">2022</xref>)</td>
<td valign="top" align="left">GCN</td>
<td valign="top" align="center">88.14</td>
<td valign="top" align="center">-</td>
</tr> <tr>
<td valign="top" align="left">Xie et al. (<xref ref-type="bibr" rid="B52">2022</xref>)</td>
<td valign="top" align="left">Transformer</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">68.54</td>
</tr>
<tr>
<td valign="top" align="left">This work</td>
<td valign="top" align="left">CSANet</td>
<td valign="top" align="center">92.36</td>
<td valign="top" align="center">70.56</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The features of the proposed CSANet model calculated on the test set in within- and between-individual tasks were extracted, and the t-SNE method was utilized to display the feature projection of four types of motor imagery in the dataset: left fist, right fist, both fists, and feet. As shown in <xref ref-type="fig" rid="F10">Figure 10</xref>, in the within-individual classification task, most of the samples exhibited high feature distinctiveness, indicating that the effective features of the four types of motor imagery could be accurately distinguished by the proposed model without considering the specificity of individual EEG signals. However, in the between-individual classification task, the feature overlap of motor imagery of the left fist, right fist, and both fists was high and the distinctiveness was low, thereby impeding accurate classification. Notably, the feature distinctiveness of motor imagery of feet was higher compared to the other three types of samples. These findings suggest that the proposed model exhibits higher recognition accuracy for activity in the somatotopic area of the unilateral motor cortex but still lacks processing of symmetric neural activity information for hand movements in the bilateral brain areas.</p>
<fig id="F10" position="float">
<label>Figure 10</label>
<caption><p>The t-SNE visualization for the four-dimensional features on the Physionet MI-EEG dataset. Include left fist, right fist, both fist, and feet. <bold>(A)</bold> Features in the within-individual classification task. <bold>(B)</bold> Features in the between-individual classification task.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnins-17-1204385-g0010.tif"/>
</fig>
</sec>
</sec>
</sec>
<sec id="s4">
<title>4. Discussion</title>
<p>In this study, we proposed a CSANet model that integrates multi-scale convolutional feature extraction, a multi-perspective sliding window, and a two-stage attention mechanism to address the challenges in classifying motor imagery EEG signals.</p>
<p>Our ablation experiments on each sub-module of the proposed method revealed that the introduction of the global attention module significantly improved the classification performance of the model on MI-EEG data. Moreover, the methods of global and local feature extraction based on sliding windows and local multi-head attention showed significant impacts on the model&#x00027;s classification performance. Although the introduction of the type token method may have certain side effects on the model in the case of single activation, its combination with local multi-head attention significantly improved the model&#x00027;s performance. This enhancement may be attributed to the fact that the role of type token is influenced by other dimensional features in different data environments, which static deep learning models without local attention mechanisms cannot handle effectively. Consequently, by integrating a local multi-head self-attention mechanism and endowing the model with the ability to learn dynamic weights of type token, the model&#x00027;s performance can be greatly improved.</p>
<p>In the study, the proposed CSANet model was compared with nine other deep learning models using the BCI-2a dataset. The results showed that a higher accuracy of 84.08% and a kappa value of 0.784 were achieved by the model in the within-individual classification tasks, surpassing the performance of other models. Furthermore, the best performance in between-individual classification was exhibited by the model, with an accuracy of 70.81% and a kappa value of 0.610. On the Physionet MI-EEG dataset, the highest accuracy was achieved by the model compared to other state-of-the-art models in both within- and between-individual classification tasks, with accuracies of 92.36 and 70.56%, respectively. These accuracies represented significant improvements of 4.22 and 2.02%, respectively. The classification of MI-EEG signals remains a challenging topic in current research, and limited improvements have been shown in previous studies on algorithms for MI-EEG signal classification. For instance, EEGNet-TCNet was proposed by Ingolfsson et al. (<xref ref-type="bibr" rid="B25">2020</xref>) on the BCI-2a dataset, achieving an accuracy of 83.34%, which represented a 2.74% improvement over previous models. Subsequently, Musallam et al. (<xref ref-type="bibr" rid="B35">2021</xref>) proposed TCNet_Fusion, which achieved an accuracy of 83.73%, a 0.39% improvement. Altuwaijri et al. (<xref ref-type="bibr" rid="B7">2022</xref>) proposed a CNN combined with an attention model, achieving an accuracy of 82.87%, which was a 0.86% improvement over previous models. On the Physionet dataset, Xie et al. (<xref ref-type="bibr" rid="B52">2022</xref>) achieved an accuracy of 68.54%, which was 2.81% higher than the previous CNN model&#x00027;s accuracy of 65.73%. The proposed CSANet model outperforms these studies on both datasets, with improvements of up to 4.22%. Although the improvement is not very significant, the same model has effective results on both datasets, demonstrating the robustness of the CSANet model.</p>
<p>In recent years, self-attention mechanisms have been widely adopted in EEG classification research. For example, Xie et al. (<xref ref-type="bibr" rid="B52">2022</xref>) utilized attention mechanisms in both temporal and spatial domains, while many other models integrated CNNs with attention mechanisms for data classification (Altuwaijri et al., <xref ref-type="bibr" rid="B7">2022</xref>). Ali et al. (<xref ref-type="bibr" rid="B2">2022</xref>) also employed a combination of CNNs but incorporated the Vision Transformer (Dosovitskiy et al., <xref ref-type="bibr" rid="B20">2021</xref>) in the attention mechanism to introduce position embeddings for feature classification. In addition to CNNs, Amin et al. (<xref ref-type="bibr" rid="B9">2022</xref>) achieved remarkable performance by integrating LSTM. Our model utilizes two types of sliding windows to extract features with both continuous and global dimensions. Local and global attention allow for a two-stage dynamic assignment of feature weights, which facilitates the selection of more relevant features. When combined with type tokens, it can extract features more accurately and enhance the robustness of the model, enabling the extraction of important features from different datasets to accurately classify EEG signals. The model&#x00027;s enhancement of classification performance in MI-EEG tasks has been demonstrated in the experiments on two public EEG datasets and has surpassed other methods. Although this improvement is not significant enough, it is at the same level as other work relative to SOTA methods. Through ablation experiments, we have proven the effectiveness of each module. Importantly, our model reduces the individual specificity of EEG signals by recognizing common patterns among subjects. This approach effectively highlights local features while also enabling the application of global features during classification. In this article, four experiments on two datasets used similar hyperparameters to achieve good performance, which also demonstrates the robustness of our proposed method and extracted features.</p>
<p>The features extracted by the proposed model were visualized using the t-SNE method, and the confusion matrix was calculated, as shown in <xref ref-type="fig" rid="F8">Figures 8</xref>&#x02013;<xref ref-type="fig" rid="F10">10</xref>. It was observed that for the within-individual classification task in BCI-2a, the classification results for foot and tongue motor imagery were significantly improved by our proposed model compared to other models. In the between-individual classification task, a higher accuracy in classifying left-hand motor imagery was achieved by the model than in other limb parts, but the overall classification result was lower. In the Physionet MI-EEG dataset, a very high feature discriminability was observed for the individual classification task. The discriminability of features for left fist, right fist, and both fists in between-individual MI classification was not high, but foot motor imagination was effectively distinguished. These results suggest that the classification effect of the proposed model in the between-individual task was lower than that in the within-individual task due to the influence of individual specificity. Furthermore, it was noted that the t-SNE analysis of the individual classification in BCI-2a and the inter-individual classification in Physionet MI-EEG demonstrated that the model had a better recognition effect on the body mapping EEG signals of the unilateral brain motor area, but the discriminability of the activation of bilateral brain information still needs to be improved. The overall improvement may be attributed to the ability of the proposed model to mine more effective spatiotemporal features and dynamically combine and weigh the features with a two-stage local and global attention mechanism to improve the overall classification performance of MI-EEG signals.</p>
<p>The model proposed in this study can be applied not only to the development of brain-computer interface control systems based on motor imagery but also to the neurorehabilitation evaluation of diseases such as Parkinson&#x00027;s and stroke based on motor imagery. In the evaluation process, MI-EEG signals from healthy individuals can first be trained based on CSANet. Then, the trained model can be used to classify and visually evaluate the motor imagery signals of patients with Parkinson&#x00027;s or stroke who are in the rehabilitation period. If the patient&#x00027;s motor system is severely damaged, the classification accuracy of the MI-EEG model might be lower than that of healthy individuals. Through visual evaluation, the specific accuracy of identification of the patient&#x00027;s limb movement imagination can be determined, and targeted training can be conducted for the parts with lower identification accuracy to quickly improve the patient&#x00027;s recovery effect. When the overall MI-EEG signal classification accuracy of the patient is high, it is indicated that the patient&#x00027;s motor imagery EEG signal pattern is close to that of healthy individuals. It could be estimated that the patient&#x00027;s nervous system has recovered to a certain level of limb movement control according to the conclusions of the mirror neuron system and the theory of embodied cognition.</p>
</sec>
<sec id="s5">
<title>5. Conclusion</title>
<p>The convolutional sliding window-attention network (CSANet) model proposed in the article is composed of novel spatiotemporal convolution, sliding window, and two-stage self-attention blocks. The adaptive feature learning and selection ability of multi-scale information correlations in EEG signals is improved by the type token, sliding window, and local and global multi-head self-attention mechanisms proposed in the model, thereby enhancing the model&#x00027;s classification performance, as demonstrated by the results of the ablation experiment analysis. The model has been demonstrated to outperform existing state-of-the-art (SOTA) models in within- and between-individual classification tasks in two commonly used MI-EEG datasets, BCI-2a and Physionet MI-EEG, with classification accuracies improved by 4.22 and 2.02%, respectively. Based on t-SNE visualization of the model features and confusion matrix analysis, it can be inferred that the proposed model exhibits superior performance in identifying EEG signals in the unilateral somatotopic area, although the discernibility of bilateral brain information activity remains a challenge. Furthermore, this study proposed a plausible neurorehabilitation assessment framework based on the model for mental diseases such as Parkinson&#x00027;s disease and stroke based on motor imagery. In future work, the model would be further improved based on its shortcomings, and experiments would be conducted on MI-EEG data of specific disease patients to demonstrate the neurorehabilitation assessment framework based on the CSANet model.</p>
</sec>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>YH designed the model, experiments, and wrote the paper. JZ, BX, XL, and YL participated in the experimental design process with JZ providing expertise in experimental design methods. BX processed the public datasets used in two experiments. XL provided experimental design advice and implementation. YL assisted in interpreting and reviewing the experimental results. ZW supervised and reviewed the research, editing, and revising the paper. HF provided model design ideas and suggestions. SC provided clinical experience and insights in the experiments. All authors contributed to the article and approved the submitted version.</p>
</sec>
</body>
<back>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>This study was supported by the Shanghai Sailing Program (No. 23YF1401100) and the Fundamental Research Funds for the Central Universities (No. 2232021D-26).</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Abiri</surname> <given-names>R.</given-names></name> <name><surname>Borhani</surname> <given-names>S.</given-names></name> <name><surname>Sellers</surname> <given-names>E. W.</given-names></name> <name><surname>Jiang</surname> <given-names>Y.</given-names></name> <name><surname>Zhao</surname> <given-names>X.</given-names></name></person-group> (<year>2019</year>). <article-title>A comprehensive review of EEG-based brain&#x02013;computer interface paradigms</article-title>. <source>J. Neural Eng.</source> <volume>16</volume>, <fpage>011001</fpage>. <pub-id pub-id-type="doi">10.1088/1741-2552/aaf12e</pub-id><pub-id pub-id-type="pmid">30523919</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ali</surname> <given-names>O.</given-names></name> <name><surname>Saif-ur-Rehman</surname> <given-names>M.</given-names></name> <name><surname>Glasmachers</surname> <given-names>T.</given-names></name> <name><surname>Iossifidis</surname> <given-names>I.</given-names></name> <name><surname>Klaes</surname> <given-names>C.</given-names></name></person-group> (<year>2022</year>). <article-title>ConTraNet: A single end-to-end hybrid network for EEG-based and EMG-based human machine interfaces</article-title>. <source>arXiv preprint arXiv:2206.10677.</source></citation>
</ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Al-Saegh</surname> <given-names>A.</given-names></name> <name><surname>Dawwd</surname> <given-names>S. A.</given-names></name> <name><surname>Abdul-Jabbar</surname> <given-names>J. M.</given-names></name></person-group> (<year>2021</year>). <article-title>Deep learning for motor imagery EEG-based classification: A review</article-title>. <source>Biomed. Signal Process. Control</source> <volume>63</volume>, <fpage>102172</fpage>. <pub-id pub-id-type="doi">10.1016/j.bspc.2020.102172</pub-id></citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Altaheri</surname> <given-names>H.</given-names></name> <name><surname>Muhammad</surname> <given-names>G.</given-names></name> <name><surname>Alsulaiman</surname> <given-names>M.</given-names></name></person-group> (<year>2022</year>). <article-title>Physics-informed attention temporal convolutional network for EEG-based motor imagery classification</article-title>. <source>IEEE Trans. Ind. Inform</source>. <volume>19</volume>, <fpage>2249</fpage>&#x02013;<lpage>2258</lpage>. <pub-id pub-id-type="doi">10.1109/TII.2022.3197419</pub-id></citation>
</ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Altaheri</surname> <given-names>H.</given-names></name> <name><surname>Muhammad</surname> <given-names>G.</given-names></name> <name><surname>Alsulaiman</surname> <given-names>M.</given-names></name> <name><surname>Amin</surname> <given-names>S. U.</given-names></name> <name><surname>Altuwaijri</surname> <given-names>G. A.</given-names></name> <name><surname>Abdul</surname> <given-names>W.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Deep learning techniques for classification of electroencephalogram (EEG) motor imagery (MI) signals: a review</article-title>. <source>Neural Comput. Appl.</source> <volume>35</volume>, <fpage>14681</fpage>&#x02013;<lpage>14722</lpage>. <pub-id pub-id-type="doi">10.1007/s00521-021-06352-5</pub-id><pub-id pub-id-type="pmid">36349568</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Altuwaijri</surname> <given-names>G. A.</given-names></name> <name><surname>Muhammad</surname> <given-names>G.</given-names></name></person-group> (<year>2022</year>). <article-title>Electroencephalogram-based motor imagery signals classification using a multi-branch convolutional neural network model with attention blocks</article-title>. <source>Bioengineering</source> <volume>9</volume>, <fpage>323</fpage>. <pub-id pub-id-type="doi">10.3390/bioengineering9070323</pub-id><pub-id pub-id-type="pmid">35877374</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Altuwaijri</surname> <given-names>G. A.</given-names></name> <name><surname>Muhammad</surname> <given-names>G.</given-names></name> <name><surname>Altaheri</surname> <given-names>H.</given-names></name> <name><surname>Alsulaiman</surname> <given-names>M.</given-names></name></person-group> (<year>2022</year>). <article-title>A multi-branch convolutional neural network with squeeze-and-excitation attention blocks for EEG-based motor imagery signals classification</article-title>. <source>Diagnostics</source> <volume>12</volume>, <fpage>995</fpage>. <pub-id pub-id-type="doi">10.3390/diagnostics12040995</pub-id><pub-id pub-id-type="pmid">35454043</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Amin</surname> <given-names>S. U.</given-names></name> <name><surname>Alsulaiman</surname> <given-names>M.</given-names></name> <name><surname>Muhammad</surname> <given-names>G.</given-names></name> <name><surname>Mekhtiche</surname> <given-names>M. A.</given-names></name> <name><surname>Shamim Hossain</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>Deep Learning for EEG motor imagery classification based on multi-layer CNNs feature fusion</article-title>. <source>Future Gener. Comput. Syst.</source> <volume>101</volume>, <fpage>542</fpage>&#x02013;<lpage>554</lpage>. <pub-id pub-id-type="doi">10.1016/j.future.2019.06.027</pub-id></citation>
</ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Amin</surname> <given-names>S. U.</given-names></name> <name><surname>Altaheri</surname> <given-names>H.</given-names></name> <name><surname>Muhammad</surname> <given-names>G.</given-names></name> <name><surname>Abdul</surname> <given-names>W.</given-names></name> <name><surname>Alsulaiman</surname> <given-names>M.</given-names></name></person-group> (<year>2022</year>). <article-title>Attention-inception and long- short-term memory-based electroencephalography classification for motor imagery tasks in rehabilitation</article-title>. <source>IEEE Trans. Ind. Inform.</source> <volume>18</volume>, <fpage>5412</fpage>&#x02013;<lpage>5421</lpage>. <pub-id pub-id-type="doi">10.1109/TII.2021.3132340</pub-id></citation>
</ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bahdanau</surname> <given-names>D.</given-names></name> <name><surname>Cho</surname> <given-names>K.</given-names></name> <name><surname>Bengio</surname> <given-names>Y.</given-names></name></person-group> (<year>2014</year>). <article-title>Neural machine translation by jointly learning to align and translate</article-title>. <source>arXiv preprint arXiv:1409.0473</source>.</citation>
</ref>
<ref id="B11">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Bhattacharyya</surname> <given-names>S.</given-names></name> <name><surname>Khasnobish</surname> <given-names>A.</given-names></name> <name><surname>Chatterjee</surname> <given-names>S.</given-names></name> <name><surname>Konar</surname> <given-names>A.</given-names></name> <name><surname>Tibarewala</surname> <given-names>D. N.</given-names></name></person-group> (<year>2010</year>). <article-title>&#x0201C;Performance analysis of LDA, QDA and KNN algorithms in left-right limb movement classification from EEG data,&#x0201D;</article-title> in <source>2010 International Conference on Systems in Medicine and Biology</source> (<publisher-loc>Kharagpur, India</publisher-loc>: <publisher-name>IEEE</publisher-name>) <fpage>126</fpage>&#x02013;<lpage>131</lpage>. <pub-id pub-id-type="doi">10.1109/ICSMB.2010.5735358</pub-id></citation>
</ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Binks</surname> <given-names>J. A.</given-names></name> <name><surname>Emerson</surname> <given-names>J. R.</given-names></name> <name><surname>Scott</surname> <given-names>M. W.</given-names></name> <name><surname>Wilson</surname> <given-names>C.</given-names></name> <name><surname>van Schaik</surname> <given-names>P.</given-names></name> <name><surname>Eaves</surname> <given-names>D. L.</given-names></name></person-group> (<year>2023</year>). <article-title>Enhancing upper-limb neurorehabilitation in chronic stroke survivors using combined action observation and motor imagery therapy</article-title>. <source>Front. Neurol.</source> <volume>14</volume>, <fpage>1097422</fpage>. <pub-id pub-id-type="doi">10.3389/fneur.2023.1097422</pub-id><pub-id pub-id-type="pmid">36937513</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Brunner</surname> <given-names>C.</given-names></name> <name><surname>Leeb</surname> <given-names>R.</given-names></name> <name><surname>M&#x000FC;ller-Putz</surname> <given-names>G.</given-names></name> <name><surname>Schl&#x000F6;gl</surname> <given-names>A.</given-names></name> <name><surname>Pfurtscheller</surname> <given-names>G.</given-names></name></person-group> (<year>2008</year>). <article-title>&#x0201C;BCI Competition 2008&#x02013;Graz data set A,&#x0201D;</article-title> in <source>Institute for Knowledge Discovery (Laboratory of Brain-Computer Interfaces)</source> (<publisher-loc>Graz University of Technology</publisher-loc>) <volume>16</volume>, <fpage>1</fpage>&#x02013;<lpage>6</lpage>.<pub-id pub-id-type="pmid">21374997</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Chatterjee</surname> <given-names>R.</given-names></name> <name><surname>Bandyopadhyay</surname> <given-names>T.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;EEG based motor imagery classification using SVM and MLP,&#x0201D;</article-title> in <source>2016 2nd International Conference on Computational Intelligence and Networks (CINE)</source> (<publisher-loc>Bhubaneswar, India</publisher-loc>: <publisher-name>IEEE</publisher-name>) <fpage>84</fpage>&#x02013;<lpage>89</lpage>. <pub-id pub-id-type="doi">10.1109/CINE.2016.22</pub-id></citation>
</ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Yang</surname> <given-names>R.</given-names></name> <name><surname>Huang</surname> <given-names>M.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Liu</surname> <given-names>X.</given-names></name></person-group> (<year>2022</year>). <article-title>Single-source to single-target cross-subject motor imagery classification based on multisubdomain adaptation network</article-title>. <source>IEEE Trans. Neural Syst. Rehabil. Eng.</source> <volume>30</volume>, <fpage>1992</fpage>&#x02013;<lpage>2002</lpage>. <pub-id pub-id-type="doi">10.1109/TNSRE.2022.3191869</pub-id><pub-id pub-id-type="pmid">35849678</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Chin</surname> <given-names>Z. Y.</given-names></name> <name><surname>Ang</surname> <given-names>K. K.</given-names></name> <name><surname>Wang</surname> <given-names>C.</given-names></name> <name><surname>Guan</surname> <given-names>C.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name></person-group> (<year>2009</year>). <article-title>&#x0201C;Multi-class filter bank common spatial pattern for four-class motor imagery BCI,&#x0201D;</article-title> in <source>2009 Annual International Conference of the IEEE Engineering in Medicine and Biology Society</source> (<publisher-loc>Minneapolis, MN</publisher-loc>: <publisher-name>IEEE</publisher-name>) <fpage>571</fpage>&#x02013;<lpage>574</lpage>.<pub-id pub-id-type="pmid">19963466</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cuomo</surname> <given-names>G.</given-names></name> <name><surname>Maglianella</surname> <given-names>V.</given-names></name> <name><surname>Ghanbari Ghooshchy</surname> <given-names>S.</given-names></name> <name><surname>Zoccolotti</surname> <given-names>P.</given-names></name> <name><surname>Martelli</surname> <given-names>M.</given-names></name> <name><surname>Paolucci</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Motor imagery and gait control in Parkinson&#x00027;s disease: techniques and new perspectives in neurorehabilitation</article-title>. <source>Expert Rev. Neurother.</source> <volume>22</volume>, <fpage>43</fpage>&#x02013;<lpage>51</lpage>. <pub-id pub-id-type="doi">10.1080/14737175.2022.2018301</pub-id><pub-id pub-id-type="pmid">34906019</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dai</surname> <given-names>M.</given-names></name> <name><surname>Zheng</surname> <given-names>D.</given-names></name> <name><surname>Na</surname> <given-names>R.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Zhang</surname> <given-names>S.</given-names></name></person-group> (<year>2019</year>). <article-title>EEG classification of motor imagery using a novel deep learning framework</article-title>. <source>Sensors</source> <volume>19</volume>, <fpage>551</fpage>. <pub-id pub-id-type="doi">10.3390/s19030551</pub-id><pub-id pub-id-type="pmid">30699946</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dose</surname> <given-names>H.</given-names></name> <name><surname>M&#x000F8;ller</surname> <given-names>J. S.</given-names></name> <name><surname>Iversen</surname> <given-names>H. K.</given-names></name> <name><surname>Puthusserypady</surname> <given-names>S.</given-names></name></person-group> (<year>2018</year>). <article-title>An end-to-end deep learning approach to MI-EEG signal classification for BCIs</article-title>. <source>Expert Syst. Appl.</source> <volume>114</volume>, <fpage>532</fpage>&#x02013;<lpage>542</lpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2018.08.031</pub-id><pub-id pub-id-type="pmid">31341093</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dosovitskiy</surname> <given-names>A.</given-names></name> <name><surname>Beyer</surname> <given-names>L.</given-names></name> <name><surname>Kolesnikov</surname> <given-names>A.</given-names></name> <name><surname>Weissenborn</surname> <given-names>D.</given-names></name> <name><surname>Zhai</surname> <given-names>X.</given-names></name> <name><surname>Unterthiner</surname> <given-names>T.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>An image is Worth 16x16 words: transformers for image recognition at scale</article-title>. <source>ICLR</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2010.11929</pub-id></citation>
</ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Goldberger</surname> <given-names>A. L.</given-names></name> <name><surname>Amaral</surname> <given-names>L. A. N.</given-names></name> <name><surname>Glass</surname> <given-names>L.</given-names></name> <name><surname>Hausdorff</surname> <given-names>J. M.</given-names></name> <name><surname>Ivanov</surname> <given-names>P. Ch.</given-names></name> <etal/></person-group>. (<year>2000</year>). <article-title>PhysioBank, physiotoolkit, and physionet: components of a new research resource for complex physiologic signals</article-title>. <source>Circulation</source> <volume>101</volume>, <fpage>e215</fpage>&#x02013;<lpage>e220</lpage>. <pub-id pub-id-type="doi">10.1161/01.CIR.101.23.e215</pub-id><pub-id pub-id-type="pmid">10851218</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hassanpour</surname> <given-names>A.</given-names></name> <name><surname>Moradikia</surname> <given-names>M.</given-names></name> <name><surname>Adeli</surname> <given-names>H.</given-names></name> <name><surname>Khayami</surname> <given-names>S. R.</given-names></name> <name><surname>Shamsinejadbabaki</surname> <given-names>P.</given-names></name></person-group> (<year>2019</year>). <article-title>A novel end-to-end deep learning scheme for classifying multi-class motor imagery electroencephalography signals</article-title>. <source>Expert Syst.</source> <volume>36</volume>, <fpage>e12494</fpage>. <pub-id pub-id-type="doi">10.1111/exsy.12494</pub-id></citation>
</ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hou</surname> <given-names>Y.</given-names></name> <name><surname>Jia</surname> <given-names>S.</given-names></name> <name><surname>Lun</surname> <given-names>X.</given-names></name> <name><surname>Hao</surname> <given-names>Z.</given-names></name> <name><surname>Shi</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>&#x0201C;GCNs-Net: a graph convolutional neural network approach for decoding time-resolved EEG motor imagery signals,&#x0201D;</article-title> in <source>IEEE Transactions on Neural Networks and Learning Systems</source> <fpage>1</fpage>&#x02013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2022.3202569</pub-id><pub-id pub-id-type="pmid">36099220</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hou</surname> <given-names>Y.</given-names></name> <name><surname>Zhou</surname> <given-names>L.</given-names></name> <name><surname>Jia</surname> <given-names>S.</given-names></name> <name><surname>Lun</surname> <given-names>X.</given-names></name></person-group> (<year>2020</year>). <article-title>A novel approach of decoding EEG four-class motor imagery tasks via scout ESI and CNN</article-title>. <source>J. Neural Eng.</source> <volume>17</volume>, <fpage>016048</fpage>. <pub-id pub-id-type="doi">10.1088/1741-2552/ab4af6</pub-id><pub-id pub-id-type="pmid">31585454</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ingolfsson</surname> <given-names>T. M.</given-names></name> <name><surname>Hersche</surname> <given-names>M.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Kobayashi</surname> <given-names>N.</given-names></name> <name><surname>Cavigelli</surname> <given-names>L.</given-names></name> <name><surname>Benini</surname> <given-names>L.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;EEG-TCNet: an accurate temporal convolutional network for embedded motor-imagery brain&#x02013;machine interfaces,&#x0201D;</article-title> in <source>2020 IEEE International Conference on Systems, Man, and Cybernetics (SMC)</source> <fpage>2958</fpage>&#x02013;<lpage>2965</lpage>. <pub-id pub-id-type="doi">10.1109/SMC42975.2020.9283028</pub-id></citation>
</ref>
<ref id="B26">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Jeunet</surname> <given-names>C.</given-names></name> <name><surname>Vi</surname> <given-names>C.</given-names></name> <name><surname>Spelmezan</surname> <given-names>D.</given-names></name> <name><surname>N&#x00027;Kaoua</surname> <given-names>B.</given-names></name> <name><surname>Lotte</surname> <given-names>F.</given-names></name> <name><surname>Subramanian</surname> <given-names>S.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;Continuous tactile feedback for motor-imagery based brain-computer interaction in a multitasking context,&#x0201D;</article-title> in <source>Human-Computer Interaction &#x02013; INTERACT 2015</source> ??, eds. J. Abascal, S. Barbosa, M. Fetter, T. Gross, P. Palanque, and M. Winckler (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer International Publishing</publisher-name>) <fpage>488</fpage>&#x02013;<lpage>505</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-22701-6_36</pub-id></citation>
</ref>
<ref id="B27">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kar&#x000E1;csony</surname> <given-names>T.</given-names></name> <name><surname>Hansen</surname> <given-names>J. P.</given-names></name> <name><surname>Iversen</surname> <given-names>H. K.</given-names></name> <name><surname>Puthusserypady</surname> <given-names>S.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Brain computer interface for neuro-rehabilitation with deep learning classification and virtual reality feedback,&#x0201D;</article-title> in <source>Proceedings of the 10th Augmented Human International Conference 2019 AH2019</source>. (<publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>) <fpage>1</fpage>&#x02013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1145/3311823.3311864</pub-id></citation>
</ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Khademi</surname> <given-names>Z.</given-names></name> <name><surname>Ebrahimi</surname> <given-names>F.</given-names></name> <name><surname>Kordy</surname> <given-names>H. M.</given-names></name></person-group> (<year>2022</year>). <article-title>A transfer learning-based CNN and LSTM hybrid deep learning model to classify motor imagery EEG signals</article-title>. <source>Comput. Biol. Med.</source> <volume>143</volume>, <fpage>105288</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2022.105288</pub-id><pub-id pub-id-type="pmid">35168083</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kumar</surname> <given-names>S.</given-names></name> <name><surname>Sharma</surname> <given-names>R.</given-names></name> <name><surname>Sharma</surname> <given-names>A.</given-names></name></person-group> (<year>2021</year>). <article-title>OPTICAL&#x0002B;: a frequency-based deep learning scheme for recognizing brain wave signals</article-title>. <source>PeerJ Comput. Sci.</source> <volume>7</volume>, <fpage>e375</fpage>. <pub-id pub-id-type="doi">10.7717/peerj-cs.375</pub-id><pub-id pub-id-type="pmid">33817023</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lawhern</surname> <given-names>V. J.</given-names></name> <name><surname>Solon</surname> <given-names>A. J.</given-names></name> <name><surname>Waytowich</surname> <given-names>N. R.</given-names></name> <name><surname>Gordon</surname> <given-names>S. M.</given-names></name> <name><surname>Hung</surname> <given-names>C. P.</given-names></name> <name><surname>Lance</surname> <given-names>B. J.</given-names></name></person-group> (<year>2018</year>). <article-title>EEGNet: a compact convolutional neural network for EEG-based brain&#x02013;computer interfaces</article-title>. <source>J. Neural Eng.</source> <volume>15</volume>, <fpage>056013</fpage>. <pub-id pub-id-type="doi">10.1088/1741-2552/aace8c</pub-id><pub-id pub-id-type="pmid">29932424</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>D.</given-names></name> <name><surname>Xu</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Fang</surname> <given-names>X.</given-names></name> <name><surname>Ji</surname> <given-names>Y.</given-names></name></person-group> (<year>2020</year>). <article-title>A multi-scale fusion convolutional neural network based on attention mechanism for the visualization analysis of EEG signals decoding</article-title>. <source>IEEE Trans. Neural Syst. Rehabil. Eng.</source> <volume>28</volume>, <fpage>2615</fpage>&#x02013;<lpage>2626</lpage>. <pub-id pub-id-type="doi">10.1109/TNSRE.2020.3037326</pub-id><pub-id pub-id-type="pmid">33175681</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>T.</given-names></name> <name><surname>Zhou</surname> <given-names>C.</given-names></name> <name><surname>Chao</surname> <given-names>F.</given-names></name></person-group> (<year>2018</year>). <article-title>Exploring spatial-frequency-sequential relationships for motor imagery classification with recurrent neural network</article-title>. <source>BMC Bioinform.</source> <volume>19</volume>, <fpage>344</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-018-2365-1</pub-id><pub-id pub-id-type="pmid">30268089</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>X.</given-names></name> <name><surname>Qiu</surname> <given-names>S.</given-names></name> <name><surname>Du</surname> <given-names>C.</given-names></name> <name><surname>Xing</surname> <given-names>J.</given-names></name> <name><surname>He</surname> <given-names>H.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;Improving EEG-based motor imagery classification via spatial and temporal recurrent neural networks,&#x0201D;</article-title> in <source>2018 40th Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC)</source> <fpage>1903</fpage>&#x02013;<lpage>1906</lpage>. <pub-id pub-id-type="doi">10.1109/EMBC.2018.8512590</pub-id><pub-id pub-id-type="pmid">30440769</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moseley</surname> <given-names>G. L.</given-names></name></person-group> (<year>2006</year>). <article-title>Graded motor imagery for pathologic pain: A randomized controlled trial</article-title>. <source>Neurology</source> <volume>67</volume>, <fpage>2129</fpage>&#x02013;<lpage>2134</lpage>. <pub-id pub-id-type="doi">10.1212/01.wnl.0000249112.56935.32</pub-id><pub-id pub-id-type="pmid">17082465</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Musallam</surname> <given-names>Y. K.</given-names></name> <name><surname>AlFassam</surname> <given-names>N. I.</given-names></name> <name><surname>Muhammad</surname> <given-names>G.</given-names></name> <name><surname>Amin</surname> <given-names>S. U.</given-names></name> <name><surname>Alsulaiman</surname> <given-names>M.</given-names></name> <name><surname>Abdul</surname> <given-names>W.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Electroencephalography-based motor imagery classification using temporal convolutional network fusion</article-title>. <source>Biomed. Signal Process. Control</source> <volume>69</volume>, <fpage>102826</fpage>. <pub-id pub-id-type="doi">10.1016/j.bspc.2021.102826</pub-id></citation>
</ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Paris-Alemany</surname> <given-names>A.</given-names></name> <name><surname>La Touche</surname> <given-names>R.</given-names></name> <name><surname>Gadea-Mateos</surname> <given-names>L.</given-names></name> <name><surname>Cuenca-Mart&#x000ED;nez</surname> <given-names>F.</given-names></name> <name><surname>Suso-Mart,&#x000ED;</surname> <given-names>L.</given-names></name></person-group> (<year>2019</year>). <article-title>Familiarity and complexity of a movement influences motor imagery in dancers: A cross-sectional study</article-title>. <source>Scand. J. Med. Sci. Sports</source> <volume>29</volume>, <fpage>897</fpage>&#x02013;<lpage>906</lpage>. <pub-id pub-id-type="doi">10.1111/sms.13399</pub-id><pub-id pub-id-type="pmid">30714228</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pinheiro</surname> <given-names>O. R.</given-names></name> <name><surname>Alves</surname> <given-names>L. R. G.</given-names></name> <name><surname>Souza</surname> <given-names>J. R. D.</given-names></name></person-group> (<year>2018</year>). <article-title>EEG Signals Classification: Motor Imagery for Driving an Intelligent Wheelchair</article-title>. <source>IEEE Lat. Am. Trans.</source> <volume>16</volume>, <fpage>254</fpage>&#x02013;<lpage>259</lpage>. <pub-id pub-id-type="doi">10.1109/TLA.2018.8291481</pub-id></citation>
</ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Qin</surname> <given-names>L.</given-names></name> <name><surname>He</surname> <given-names>B.</given-names></name></person-group> (<year>2005</year>). <article-title>A wavelet-based time&#x02013;frequency analysis approach for classification of motor imagery for brain&#x02013;computer interface applications</article-title>. <source>J. Neural Eng.</source> <volume>2</volume>, <fpage>65</fpage>. <pub-id pub-id-type="doi">10.1088/1741-2560/2/4/001</pub-id><pub-id pub-id-type="pmid">16317229</pub-id></citation></ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ron-Angevin</surname> <given-names>R.</given-names></name> <name><surname>Velasco-&#x000C1;lvarez</surname> <given-names>F.</given-names></name> <name><surname>Fern&#x000E1;ndez-Rodr&#x000ED;guez</surname> <given-names>&#x000C1;.</given-names></name> <name><surname>D&#x000ED;az-Estrella</surname> <given-names>A.</given-names></name> <name><surname>Blanca-Mena</surname> <given-names>M. J.</given-names></name> <name><surname>Vizca&#x000ED;no-Mart&#x000ED;n</surname> <given-names>F. J.</given-names></name></person-group> (<year>2017</year>). <article-title>Brain-Computer Interface application: auditory serial interface to control a two-class motor-imagery-based wheelchair</article-title>. <source>J. NeuroEng. Rehabil.</source> <volume>14</volume>, <fpage>49</fpage>. <pub-id pub-id-type="doi">10.1186/s12984-017-0261-y</pub-id><pub-id pub-id-type="pmid">28558741</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Samuel</surname> <given-names>O. W.</given-names></name> <name><surname>Geng</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>G.</given-names></name></person-group> (<year>2017</year>). <article-title>Towards efficient decoding of multiple classes of motor imagery limb movements based on EEG spectral and time domain descriptors</article-title>. <source>J. Med. Syst.</source> <volume>41</volume>, <fpage>1</fpage>&#x02013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1007/s10916-017-0843-z</pub-id><pub-id pub-id-type="pmid">29080913</pub-id></citation></ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schirrmeister</surname> <given-names>R. T.</given-names></name> <name><surname>Springenberg</surname> <given-names>J. T.</given-names></name> <name><surname>Fiederer</surname> <given-names>L. D. J.</given-names></name> <name><surname>Glasstetter</surname> <given-names>M.</given-names></name> <name><surname>Eggensperger</surname> <given-names>K.</given-names></name> <name><surname>Tangermann</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Deep learning with convolutional neural networks for EEG decoding and visualization</article-title>. <source>Hum. Brain Mapp.</source> <volume>38</volume>, <fpage>5391</fpage>&#x02013;<lpage>5420</lpage>. <pub-id pub-id-type="doi">10.1002/hbm.23730</pub-id><pub-id pub-id-type="pmid">28782865</pub-id></citation></ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Selim</surname> <given-names>S.</given-names></name> <name><surname>Tantawi</surname> <given-names>M. M.</given-names></name> <name><surname>Shedeed</surname> <given-names>H. A.</given-names></name> <name><surname>Badr</surname> <given-names>A.</given-names></name></person-group> (<year>2018</year>). <article-title>A CSP\AM-BA-SVM approach for motor imagery BCI system</article-title>. <source>IEEE Access</source> <volume>6</volume>, <fpage>49192</fpage>&#x02013;<lpage>49208</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2018.2868178</pub-id></citation>
</ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Singh</surname> <given-names>A.</given-names></name> <name><surname>Lal</surname> <given-names>S.</given-names></name> <name><surname>Guesgen</surname> <given-names>H. W.</given-names></name></person-group> (<year>2019</year>). <article-title>Reduce calibration time in motor imagery using spatially regularized symmetric positives-definite matrices based classification</article-title>. <source>Sensors</source> <volume>19</volume>:<fpage>379</fpage>. <pub-id pub-id-type="doi">10.3390/s19020379</pub-id><pub-id pub-id-type="pmid">30658523</pub-id></citation></ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Steyrl</surname> <given-names>D.</given-names></name> <name><surname>Scherer</surname> <given-names>R.</given-names></name> <name><surname>F&#x000F6;rstner</surname> <given-names>O.</given-names></name> <name><surname>M&#x000FC;ller-Putz</surname> <given-names>G. R.</given-names></name></person-group> (<year>2014</year>). <article-title>&#x0201C;Motor imagery brain-computer interfaces: random forests vs. regularized LDA-non-linear beats linear,&#x0201D;</article-title> in <source>Proceedings of the 6th International Brain-Computer Interface Conference</source> <fpage>241</fpage>&#x02013;<lpage>244</lpage>.</citation>
</ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tamir</surname> <given-names>R.</given-names></name> <name><surname>Dickstein</surname> <given-names>R.</given-names></name> <name><surname>Huberman</surname> <given-names>M.</given-names></name></person-group> (<year>2007</year>). <article-title>Integration of motor imagery and physical practice in group treatment applied to subjects with Parkinson&#x00027;s disease</article-title>. <source>Neurorehabil. Neural Repair</source> <volume>21</volume>, <fpage>68</fpage>&#x02013;<lpage>75</lpage>. <pub-id pub-id-type="doi">10.1177/1545968306292608</pub-id><pub-id pub-id-type="pmid">17172556</pub-id></citation></ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vaid</surname> <given-names>S.</given-names></name> <name><surname>Singh</surname> <given-names>P.</given-names></name> <name><surname>Kaur</surname> <given-names>C.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;EEG signal analysis for BCI interface: a review,&#x0201D;</article-title> in <source>2015 Fifth International Conference on Advanced Computing and Communication Technologies</source> <fpage>143</fpage>&#x02013;<lpage>147</lpage>. <pub-id pub-id-type="doi">10.1109/ACCT.2015.72</pub-id></citation>
</ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vaswani</surname> <given-names>A.</given-names></name> <name><surname>Shazeer</surname> <given-names>N.</given-names></name> <name><surname>Parmar</surname> <given-names>N.</given-names></name> <name><surname>Uszkoreit</surname> <given-names>J.</given-names></name> <name><surname>Jones</surname> <given-names>L.</given-names></name> <name><surname>Gomez</surname> <given-names>A. N.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>&#x0201C;Attention is all you need,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems.</source></citation>
</ref>
<ref id="B48">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>P.</given-names></name> <name><surname>Jiang</surname> <given-names>A.</given-names></name> <name><surname>Liu</surname> <given-names>X.</given-names></name> <name><surname>Shang</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name></person-group> (<year>2018a</year>). <article-title>LSTM-based EEG classification in motor imagery tasks</article-title>. <source>IEEE Trans. Neural Syst. Rehabil. Eng.</source> <volume>26</volume>, <fpage>2086</fpage>&#x02013;<lpage>2095</lpage>. <pub-id pub-id-type="doi">10.1109/TNSRE.2018.2876129</pub-id><pub-id pub-id-type="pmid">30334800</pub-id></citation></ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Hersche</surname> <given-names>M.</given-names></name> <name><surname>T&#x000F6;mekce</surname> <given-names>B.</given-names></name> <name><surname>Kaya</surname> <given-names>B.</given-names></name> <name><surname>Magno</surname> <given-names>M.</given-names></name> <name><surname>Benini</surname> <given-names>L.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;An accurate EEGNet-based motor-imagery brain&#x02013;computer interface for low-power edge computing,&#x0201D;</article-title> in <source>2020 IEEE International Symposium on Medical Measurements and Applications (MeMeA)</source> <fpage>1</fpage>&#x02013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1109/MeMeA49120.2020.9137134</pub-id></citation>
</ref>
<ref id="B50">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Cao</surname> <given-names>L.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Gong</surname> <given-names>X.</given-names></name> <name><surname>Sun</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name></person-group> (<year>2018b</year>). <article-title>Short time Fourier transformation and deep neural networks for motor imagery brain computer interface recognition</article-title>. <source>Concurr. Comput. Pract. Exp.</source> <volume>30</volume>, <fpage>e4413</fpage>. <pub-id pub-id-type="doi">10.1002/cpe.4413</pub-id><pub-id pub-id-type="pmid">37294411</pub-id></citation></ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Williams</surname> <given-names>J. G.</given-names></name> <name><surname>Odley</surname> <given-names>J. L.</given-names></name> <name><surname>Callaghan</surname> <given-names>M.</given-names></name></person-group> (<year>2004</year>). <article-title>Motor imagery boosts proprioceptive neuromuscular facilitation in the attainment and retention of range-of -motion at the hip joint</article-title>. <source>J. Sports Sci. Med.</source> <volume>3</volume>, <fpage>160</fpage>&#x02013;<lpage>166</lpage>.<pub-id pub-id-type="pmid">24482593</pub-id></citation></ref>
<ref id="B52">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xie</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Sun</surname> <given-names>J.</given-names></name> <name><surname>Ma</surname> <given-names>Z.</given-names></name> <name><surname>Qin</surname> <given-names>L.</given-names></name> <name><surname>Li</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>A transformer-based approach combining deep learning network and spatial-temporal information for raw EEG classification</article-title>. <source>IEEE Trans. Neural Syst. Rehabil. Eng.</source> <volume>30</volume>, <fpage>2126</fpage>&#x02013;<lpage>2136</lpage>. <pub-id pub-id-type="doi">10.1109/TNSRE.2022.3194600</pub-id><pub-id pub-id-type="pmid">35914032</pub-id></citation></ref>
<ref id="B53">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>H.</given-names></name> <name><surname>Plataniotis</surname> <given-names>K. N.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;Affective states classification using EEG and semi-supervised deep learning approaches,&#x0201D;</article-title> in <source>2016 IEEE 18th International Workshop on Multimedia Signal Processing (MMSP)</source> (<publisher-loc>Montreal, QC, Canada</publisher-loc>: <publisher-name>IEEE</publisher-name>) <fpage>1</fpage>&#x02013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1109/MMSP.2016.7813351</pub-id></citation>
</ref>
<ref id="B54">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zancanaro</surname> <given-names>A.</given-names></name> <name><surname>Cisotto</surname> <given-names>G.</given-names></name> <name><surname>Paulo</surname> <given-names>J. R.</given-names></name> <name><surname>Pires</surname> <given-names>G.</given-names></name> <name><surname>Nunes</surname> <given-names>U. J.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;CNN-based approaches for cross-subject classification in motor imagery: from the state-of-the-art to DynamicNet,&#x0201D;</article-title> in <source>2021 IEEE Conference on Computational Intelligence in Bioinformatics and Computational Biology (CIBCB)</source> <fpage>1</fpage>&#x02013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1109/CIBCB49929.2021.9562821</pub-id></citation>
</ref>
<ref id="B55">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>D.</given-names></name> <name><surname>Chen</surname> <given-names>K.</given-names></name> <name><surname>Jian</surname> <given-names>D.</given-names></name> <name><surname>Yao</surname> <given-names>L.</given-names></name></person-group> (<year>2020</year>). <article-title>Motor Imagery Classification via Temporal Attention Cues of Graph Embedded EEG Signals</article-title>. <source>IEEE J. Biomed. Health Inform.</source> <volume>24</volume>, <fpage>2570</fpage>&#x02013;<lpage>2579</lpage>. <pub-id pub-id-type="doi">10.1109/JBHI.2020.2967128</pub-id><pub-id pub-id-type="pmid">31976916</pub-id></citation></ref>
<ref id="B56">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zimmermann-Schlatter</surname> <given-names>A.</given-names></name> <name><surname>Schuster</surname> <given-names>C.</given-names></name> <name><surname>Puhan</surname> <given-names>M. A.</given-names></name> <name><surname>Siekierka</surname> <given-names>E.</given-names></name> <name><surname>Steurer</surname> <given-names>J.</given-names></name></person-group> (<year>2008</year>). <article-title>Efficacy of motor imagery in post-stroke rehabilitation: a systematic review</article-title>. <source>J. NeuroEng. Rehabil.</source> <volume>5</volume>, <fpage>8</fpage>. <pub-id pub-id-type="doi">10.1186/1743-0003-5-8</pub-id><pub-id pub-id-type="pmid">18341687</pub-id></citation></ref>
</ref-list> 
</back>
</article> 