<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Med.</journal-id>
<journal-title>Frontiers in Medicine</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Med.</abbrev-journal-title>
<issn pub-type="epub">2296-858X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmed.2020.613708</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Medicine</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Automatic Recognition of Auditory Brainstem Response Characteristic Waveform Based on Bidirectional Long Short-Term Memory</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Chen</surname> <given-names>Cheng</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhan</surname> <given-names>Li</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Pan</surname> <given-names>Xiaoxin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Zhiliang</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Guo</surname> <given-names>Xiaoyu</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Qin</surname> <given-names>Handai</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Xiong</surname> <given-names>Fen</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Shi</surname> <given-names>Wei</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Shi</surname> <given-names>Min</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1105829/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Ji</surname> <given-names>Fei</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Qiuju</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Yu</surname> <given-names>Ning</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1098921/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Xiao</surname> <given-names>Ruoxiu</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1103965/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>School of Computer and Communication Engineering, University of Science &#x00026; Technology Beijing</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>College of Otolaryngology Head and Neck Surgery, National Clinical Research Center for Otolaryngologic Diseases, Key Lab of Hearing Science, Ministry of Education, Beijing Key Lab of Hearing Impairment for Prevention and Treatment, Chinese PLA General Hospital</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>Institute of Artificial Intelligence, University of Science and Technology Beijing</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Changxin Zhang, East China Normal University, China</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Chun Liang, Shenzhen Maternity and Child Healthcare Hospital, China; Kirupa Suthakar, National Institute on Deafness and Other Communication Disorders (NIDCD), United States</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Ning Yu <email>yuning&#x00040;301hospital.org</email></corresp>
<corresp id="c002">Ruoxiu Xiao <email>xiaoruoxiu&#x00040;ustb.edu.cn</email></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Translational Medicine, a section of the journal Frontiers in Medicine</p></fn>
<fn fn-type="other" id="fn002"><p>&#x02020;These authors have contributed equally to this work</p></fn></author-notes>
<pub-date pub-type="epub">
<day>11</day>
<month>01</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2020</year>
</pub-date>
<volume>7</volume>
<elocation-id>613708</elocation-id>
<history>
<date date-type="received">
<day>03</day>
<month>10</month>
<year>2020</year>
</date>
<date date-type="accepted">
<day>03</day>
<month>12</month>
<year>2020</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2021 Chen, Zhan, Pan, Wang, Guo, Qin, Xiong, Shi, Shi, Ji, Wang, Yu and Xiao.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Chen, Zhan, Pan, Wang, Guo, Qin, Xiong, Shi, Shi, Ji, Wang, Yu and Xiao</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license> </permissions>
<abstract><p><bold>Background:</bold> Auditory brainstem response (ABR) testing is an invasive electrophysiological auditory function test. Its waveforms and threshold can reflect auditory functional changes in the auditory centers in the brainstem and are widely used in the clinic to diagnose dysfunction in hearing. However, identifying its waveforms and threshold is mainly dependent on manual recognition by experimental persons, which could be primarily influenced by individual experiences. This is also a heavy job in clinical practice.</p>
<p><bold>Methods:</bold> In this work, human ABR was recorded. First, binarization is created to mark 1,024 sampling points accordingly. The selected characteristic area of ABR data is 0&#x02013;8 ms. The marking area is enlarged to expand feature information and reduce marking error. Second, a bidirectional long short-term memory (BiLSTM) network structure is established to improve relevance of sampling points, and an ABR sampling point classifier is obtained by training. Finally, mark points are obtained through thresholding.</p>
<p><bold>Results:</bold> The specific structure, related parameters, recognition effect, and noise resistance of the network were explored in 614 sets of ABR clinical data. The results show that the average detection time for each data was 0.05 s, and recognition accuracy reached 92.91%.</p>
<p><bold>Discussion:</bold> The study proposed an automatic recognition of ABR waveforms by using the BiLSTM-based machine learning technique. The results demonstrated that the proposed methods could reduce recording time and help doctors in making diagnosis, suggesting that the proposed method has the potential to be used in the clinic in the future.</p></abstract>
<kwd-group>
<kwd>auditory brainstem response</kwd>
<kwd>characteristic waveform recognition</kwd>
<kwd>neural network model</kwd>
<kwd>bi-directional long short-term memory</kwd>
<kwd>wavelet transform</kwd>
</kwd-group>
<contract-sponsor id="cn001">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content></contract-sponsor>
<contract-sponsor id="cn002">National Key Research and Development Program of China<named-content content-type="fundref-id">10.13039/501100012166</named-content></contract-sponsor>
<contract-sponsor id="cn003">General Hospital of People&#x00027;s Liberation Army<named-content content-type="fundref-id">10.13039/501100009580</named-content></contract-sponsor>
<contract-sponsor id="cn004">Fundamental Research Funds for the Central Universities<named-content content-type="fundref-id">10.13039/501100012226</named-content></contract-sponsor>
<counts>
<fig-count count="12"/>
<table-count count="3"/>
<equation-count count="13"/>
<ref-count count="20"/>
<page-count count="12"/>
<word-count count="5921"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>Auditory brainstem response (ABR) is a global neural activity in the auditory brainstem centers evoked by acoustic stimulations. It can observe the functional status of the auditory nerve and lower auditory center and reflect the conduction ability of the brainstem auditory pathway (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>). Given that patient&#x00027;s hearing impairment can be diagnosed without his active cooperation, ABR has become one of the routine methods for adult hearing recording (<xref ref-type="bibr" rid="B3">3</xref>&#x02013;<xref ref-type="bibr" rid="B5">5</xref>). The ABR waveform usually has a range of interwave latency, and its potential in microvolts is recorded. Normal ABR usually has five peaks visible, i.e., waves I, II, III, IV, and V. Wave V usually appears as the largest peak in the ABR. In clinical diagnosis, the minimum intensity of sound stimulation to be capable of evoking a recognized ABR is defined as ABR threshold, which is usually dependent on wave V or wave III (<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B7">7</xref>). <xref ref-type="fig" rid="F1">Figure 1</xref> shows the annotated ABR waveforms, which are mainly identified as waves I, III, and V clinically. Other characteristic waves are usually not displayed clearly because of small amplitude, two-wave fusion, and noise interference. Thus, they are rarely used as a basis for diagnosis.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>The annotated ABR waveform (legend data is selected from the datasets applied in this work).</p></caption>
<graphic xlink:href="fmed-07-613708-g0001.tif"/>
</fig>
<p>In clinical diagnosis, the minimum stimulation intensity of wave V is usually used as ABR threshold. Sometimes, when wave III is greater than wave V, the ABR threshold is judged by stimulation intensity of wave III (<xref ref-type="bibr" rid="B8">8</xref>). In determining lesions, the location can be judged according to the interwave latency of waves I, III, and V and the interwave latency between waves and binaural waves (<xref ref-type="bibr" rid="B9">9</xref>). Furthermore, the types of deafness of a patient can be judged by observing the change characteristics of ABR waveform latency and the special shape of the ABR waveform in the same patient under different stimulation levels. Thus, the ABR threshold and interwave latency of waves I, III, and V, which are of great significance in clinical applications, can be obtained by identifying the position of the characteristic wave of ABR. Usually, the potential obtained from each stimulation is weak. In a clinical testing, multiple stimulations must be performed to superimpose, average, and obtain relatively stable waveform results. This process is susceptible to interference by electrical noise arising from stray myogenic potentials or movement artifact. In addition, performing multiple tests on patients and comparing results to avoid unobvious peaks, overlapping peaks, and false peaks, which not only consume a lot of time but are also prone to subjective judgment errors, are usually necessary. Thus, identifying the waveform characteristics of ABR and avoiding interference caused by unclear differentiation, fuzzy characteristics, and abnormal waveforms are important issues that need to be solved urgently and correctly in clinical ABR recording.</p>
<p>The application of computer technology in assisting medical diagnosis can effectively reduce errors caused by repetitive work and complex waveform characteristics. This research direction has been important for ABR consultation for a long time (<xref ref-type="bibr" rid="B10">10</xref>). For example, Wilson (<xref ref-type="bibr" rid="B11">11</xref>) discussed the relationship between ABR and discrete wavelet transform reconstructed waveforms, indicating that the discrete wavelet transform waveform of ABR can be used as an effective time&#x02013;frequency representation of normal ABR but with certain limitations. Especially in some cases, the reconstructed ABR discrete wavelet transform wave is missing because of the invariance of discrete wavelet transform shift. Bradly and Wilson (<xref ref-type="bibr" rid="B12">12</xref>) further studied the method of using derivative wavelet estimation to automatically analyze ABR, which improved the accuracy of the main wave identification to a high level. However, they also mentioned the need for further research on the performance of waveform recognition of abnormal subjects, and manual judgment of abnormal waveforms is still required under clinical conditions. Zhang et al. (<xref ref-type="bibr" rid="B13">13</xref>) proposed an ABR classification method that combined wavelet transform and Bayesian network to reduce the number of stimulus repetitions and avoid nerve fatigue of the examinee. Important features are extracted through image thresholding and wavelet transform. Subsequently, features were applied as variables to classify using Bayesian networks. Experimental results show that the ABR data with only 128 repetitive stimulations can achieve an accuracy of 84.17%. Compared with the clinical test that usually requires 2,000 repetitions, the detection efficiency of ABR is improved greatly. However, wave I and wave V are always prolonged by about 0.1 ms and cause wave range changes. Therefore, III&#x02013;V/I&#x02013;III would be inaccurate as an indicator.</p>
<p>Thus, automatic recognition of ABR waveforms through computer-assisted methods can assist clinicians and audiologists in ABR interpretation effectively. It also reduces the errors caused by subjective factors, the interference of complex waveforms, and the burden of a large number of repetitive tasks for the medical staff. This study proposes a method of using the long short-term memory (LSTM) network to identify waves I, III, and V in the ABR waveform and proposes a new idea for the recognition of ABR characteristic waveforms by neural networks. The structure of the study is organized as follows: The experimental data and the detailed description of the proposed method are presented in the Materials and Methods section. The Results section presents the experimental design and the corresponding results. Finally, the Discussion section provides an elaboration of the findings of this work.</p>
</sec>
<sec sec-type="materials and methods" id="s2">
<title>Materials and Methods</title>
<sec>
<title>Data Source</title>
<p>The data are provided by the Department of Otolaryngology Head and Neck Surgery, Chinese PLA General Hospital. The SmartEP evoked potential test system developed by the American Smart Listening Company is used for measurement and acquisition. <xref ref-type="fig" rid="F2">Figure 2</xref> shows the clinical collection process, where <xref ref-type="fig" rid="F2">Figure 2a</xref> represents skin degreasing to enhance conductivity; <xref ref-type="fig" rid="F2">Figure 2b</xref> represents the position of the forehead and earlobe electrodes; <xref ref-type="fig" rid="F2">Figure 2c</xref> represents the positional relationship diagram of the preamplifier, electrodes, and plug-in earphones; and <xref ref-type="fig" rid="F2">Figure 2d</xref> shows the details of the preamplifier. The collected waveform is stored in a server <xref ref-type="fig" rid="F2">Figure 2e</xref> and can be observed with the monitor. Six hundred and fourteen subjects&#x00027; clinical click stimuli ABR data were collected at 96 dB nHL stimulation intensity after 1,024 repeated stimulations, which contain 181 normal and 433 abnormal hearing. The clinical dataset comprises 348 men and 266 women aged 18 to 90 years old. For data structure, the data contain 1,024 sampling points that range from &#x02212;12.78 to 12.80 ms with an average interval of 0.025 ms between every two sampling points. All data were marked by three clinical audiologists with characteristic waves: wave I, wave III, and wave V, and cross-validated. Finally, the data were randomly divided into training and test sets. A total of 491 training sets were used to train the network model, and 123 test sets were used for the final recognition accuracy test.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>The ABR hearing diagnosis clinical collection process. <bold>(a)</bold> Skin degreasing to enhance conductivity; <bold>(b)</bold> the position of the forehead and earlobe electrodes; <bold>(c)</bold> the positional relationship diagram of the preamplifier, electrodes, and plug-in earphones; and <bold>(d)</bold> the details of the preamplifier. The collected waveform is stored in a server <bold>(e)</bold> and can be observed with the monitor.</p></caption>
<graphic xlink:href="fmed-07-613708-g0002.tif"/>
</fig>
</sec>
<sec>
<title>Data Processing</title>
<p>In this work, a new data processing method is proposed. To quantify waveform and label points, two 1,024 &#x000D7; 1 matrices <italic>A</italic> and <italic>B</italic> were generated as the classification train and label, respectively. <italic>A</italic> represents the potential of the input ABR data. The position of the serial number corresponds to the position of the ABR data sampling point. <italic>B</italic> represents nonfeature (0) and feature points (1), respectively. Thus, according to the position of the label value of the label data, the data that corresponded to the position of the label matrix was changed to 1 to meet the binary classification requirements of all sampling points. However, noise created by myogenic potential is observed in some experimental data (<xref ref-type="fig" rid="F3">Figure 3</xref>). In this ABR clinical test data, the ABR waveform has an unusual increase in the sampling point at the end because of the fluctuation of characteristic waves VI and VII and the result of the external interference. To prevent the interference caused by abnormal data, the data up to 8 ms were selected uniformly to identify the characteristic waves.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Abnormal ABR waveform and data quantization method.</p></caption>
<graphic xlink:href="fmed-07-613708-g0003.tif"/>
</fig>
<p>On the other hand, the starting point of the actual stimulation is 0 ms. The final potential value input data and the corresponding training label both retained only 321 sampling points of 0&#x02013;8 ms to avoid interference with neural network training and reduce the amount of calculation in the neural network training process. Thus, <italic>A</italic> and <italic>b</italic><sub><italic>f</italic></sub> are updated as follows:</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M1"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mtable columnalign='left'><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mrow><mml:mi>A</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>321</mml:mn></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo>&#x0007B;</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mn>...</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mn>321</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0007D;</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:msup></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mrow><mml:mi>B</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>321</mml:mn></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo>&#x0007B;</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mn>...</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mrow><mml:mn>321</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0007D;</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:msup></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mrow></mml:mrow></mml:math></disp-formula>
<p>In actual processing, the loss function value can easily reach a low level, and sufficient information cannot be learned because the ratio of the labeled value to the unlabeled value in the 321 sample points is only 3:318. The manually labeled information may also bring certain errors. Thus, this study adopted the method of augmenting the position of the identification point in the training label. The four points (0.1 ms) before and after the original marking point were marked as the characteristic area, which expands the marking range of the characteristic waveform.</p>
</sec>
<sec>
<title>Network Structure</title>
<p>LSTM is a recurrent neural network and mainly improved on the basis of the time step unit by adding the output of memory cells to carry information that needs to be transmitted for a long time. Three gate structures are also added. These gate structures are used to select the retention of the memory cell <italic>C</italic><sub><italic>t</italic>&#x02212;1</sub> value passed from the previous time step, add new information into the memory cell <italic>V</italic>, and predict and output the information transmitted by the memory cell and continue to pass it to the next time step.</p>
<p><xref ref-type="fig" rid="F4">Figure 4</xref> is a schematic diagram of the LSTM structure. First, to control the proportion of the input information retained by the memory cells at the previous time step, the output is calculated as follows:</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M2"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>U</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p><italic>h</italic><sub><italic>t</italic>&#x02212;1</sub> is the hidden state value passed at the previous time step; and <italic>W</italic><sub><italic>f</italic></sub>, &#x02026;, and <italic>b</italic><sub><italic>f</italic></sub> are the corresponding weights and biases. The activation function usually uses the sigmoid function to map the activation value between [0, 1]. To control the proportion of information updated into the memory cell, the sigmoid activation function was first applied to obtain the output <italic>i</italic><sub><italic>i</italic></sub>. Then, the tan<italic>h</italic> activation function is applied to obtain, and the product of the two is used as the information to update the memory cell. <italic>i</italic><sub><italic>t</italic></sub> and <italic>a</italic><sub><italic>t</italic></sub> are calculated as follows:</p>
<disp-formula id="E3"><label>(3)</label><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>U</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E4"><label>(4)</label><mml:math id="M4"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext>tan</mml:mtext><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>U</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>W</italic><sub><italic>i</italic></sub>, <italic>U</italic><sub><italic>i</italic></sub>, <italic>b</italic><sub><italic>i</italic></sub>, <italic>W</italic><sub><italic>a</italic></sub>, <italic>U</italic><sub><italic>a</italic></sub>, and <italic>b</italic><sub><italic>a</italic></sub> are the weights and biases. Finally, the memory cell <italic>C</italic><sub><italic>t</italic></sub> is calculated to the next time step by using Equation (5):</p>
<disp-formula id="E5"><label>(5)</label><mml:math id="M5"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x02299;</mml:mo><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02299;</mml:mo><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where &#x02299; is the Hadamard product, which indicates that the corresponding positions of the matrix are multiplied. The right side refers to the output gate, and the output of the output gate is calculated by using Equation (6):</p>
<disp-formula id="E6"><label>(6)</label><mml:math id="M6"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>o</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>U</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>W</italic><sub><italic>o</italic></sub>, <italic>U</italic><sub><italic>o</italic></sub>, and <italic>b</italic><sub><italic>o</italic></sub> are the weights and offsets. Finally, the output value <italic>h</italic><sub><italic>t</italic></sub> at the time step is obtained through using Equation (7):</p>
<disp-formula id="E7"><label>(7)</label><mml:math id="M7"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>o</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02299;</mml:mo><mml:mtext>tan</mml:mtext><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>Schematic diagram of the LSTM network structure.</p></caption>
<graphic xlink:href="fmed-07-613708-g0004.tif"/>
</fig>
<p>The predicted output weight and bias are applied to activate the output value to obtain the predicted value, as shown in Equation (8):</p>
<disp-formula id="E8"><label>(8)</label><mml:math id="M8"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mover accent='true'><mml:mi>y</mml:mi><mml:mo>&#x0005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>V</mml:mi><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Finally, the loss values <inline-formula><mml:math id="M9"><mml:msubsup><mml:mrow><mml:mi>&#x003B4;</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> and <inline-formula><mml:math id="M10"><mml:msubsup><mml:mrow><mml:mi>&#x003B4;</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> of the hidden state are calculated as follows:</p>
<disp-formula id="E9"><label>(9)</label><mml:math id="M11"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>&#x003B4;</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent='true'><mml:mi>y</mml:mi><mml:mo>&#x0005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>&#x02202;</mml:mi><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>&#x02202;</mml:mi><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup><mml:msubsup><mml:mrow><mml:mi>&#x003B4;</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E10"><label>(10)</label><mml:math id="M12"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>&#x003B4;</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mi>&#x003B4;</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>&#x02299;</mml:mo><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msubsup><mml:mrow><mml:mi>&#x003B4;</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x02299;</mml:mo><mml:msub><mml:mrow><mml:mi>o</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02299;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:mtext>tan</mml:mtext><mml:msup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>In this work, BiLSTM is established as the network structure to enable the input sequence to have a bidirectional connection with one another (<xref ref-type="bibr" rid="B14">14</xref>). <xref ref-type="fig" rid="F5">Figure 5</xref> shows that another LSTM layer that propagates backward in time is added on the basis of the unidirectional LSTM forward propagation in time sequence. The final output is determined by the output of the two LSTM layers: forward and backward. Compared with the one-way LSTM, the final output avoids the prediction at each time to only be affected by the input of the previous time. Moreover, it can reflect the information characteristics before and after each prediction point better, thereby making more accurate predictions.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>Schematic diagram of the BiLSTM structure.</p></caption>
<graphic xlink:href="fmed-07-613708-g0005.tif"/>
</fig>
</sec>
<sec>
<title>Wavelet Transform</title>
<p>In the traditional mode, wavelet transform is a commonly used method in ABR extraction and recognition research (<xref ref-type="bibr" rid="B15">15</xref>). In ABR extraction, wavelet transform can achieve the effect of eliminating noise by selecting the detailed components of specific frequencies for reconstruction and to make the ABR waveform smoother. Obtaining relatively clear waveforms while reducing repetitive stimulation is also possible. Generally, continuous wavelet transform is defined as (<xref ref-type="bibr" rid="B16">16</xref>):</p>
<disp-formula id="E11"><label>(11)</label><mml:math id="M13"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>W</mml:mi><mml:mi>T</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x003C4;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msqrt><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msqrt></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x0222B;</mml:mo></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mi>&#x0221E;</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x0221E;</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>*</mml:mo><mml:mi>&#x003C8;</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mi>&#x003C4;</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mtext>d</mml:mtext><mml:mi>t</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>f</italic>(<italic>t</italic>) is the signal in the time domain, and the part of <inline-formula><mml:math id="M14"><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msqrt><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msqrt></mml:mrow></mml:mfrac><mml:mi>&#x003C8;</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mi>&#x003C4;</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:math></inline-formula> is a wavelet function, which can also be denoted as &#x003C8;<sub><italic>a</italic>,&#x003C4;</sub>(<italic>t</italic>). Two variables, namely, scale <italic>a</italic> and translation &#x003C4;, are available. Scale <italic>a</italic> is applied to control the expansion and contraction of the wavelet function, and the translation amount &#x003C4; controls the translation of the wavelet function. Scale <italic>a</italic> is inversely proportional to its equivalent frequency, which is defined as &#x003C6;(<italic>t</italic>). The complete wavelet expansion is as follows:</p>
<disp-formula id="E12"><label>(12)</label><mml:math id="M15"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mi>&#x0221E;</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x0221E;</mml:mi></mml:mrow></mml:msubsup><mml:msub><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mi>&#x003C6;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mi>&#x0221E;</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x0221E;</mml:mi></mml:mrow></mml:msubsup><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>&#x0221E;</mml:mi></mml:mrow></mml:msubsup><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mi>&#x003C8;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msup><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>c</italic> and <italic>d</italic> are the coefficients of the corresponding function, <italic>j</italic> is the frequency domain parameter that determines the frequency characteristics of the wavelet, and <italic>k</italic> is the time domain parameter that controls the position of the wavelet base in the time domain. Although the scale and wavelet functions are complex and have different characteristics, the process of wavelet decomposition can be regarded as using a low-pass filter and a high-pass filter to decompose the signal by frequency. The low-frequency components decomposed in each layer are called approximate components, and the high-frequency components are called detailed components. Thus, approximate components and detailed components were applied to the reconstructed waveform.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec>
<title>Experimental Procedure</title>
<p>In this study, three sets of experiments, namely, (1) comparison between various network structures, (2) comparison experiment of wavelet transform, and (3) comparison experiment of different hidden layer nodes, were designed. <xref ref-type="fig" rid="F6">Figure 6</xref> shows the experimental flowchart. The sequence input layer was used as the input of the potential value of 321 sampling points, and the data were passed to several LSTM or BiLSTM layers. Subsequently, the fully connected layer was connected. The classification probability of each time point was calculated using the softmax function. Finally, the classification layer was connected. The cross-entropy function (<xref ref-type="bibr" rid="B17">17</xref>) was used to calculate the loss function of each time point and the overall loss function of the sequence. Then, the time sequence was classified.</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>Experimental flowchart.</p></caption>
<graphic xlink:href="fmed-07-613708-g0006.tif"/>
</fig>
<p>In the comparison experiment of multiple network structures, seven network structures, namely, (1) single-layer LSTM, (2) double-layer LSTM, (3) single-layer BiLSTM, (4) double-layer BiLSTM, (5) three-layer BiLSTM, (6) four-layer BiLSTM, and (7) five-layer BiLSTM network layers, were selected. In the comparative experiment of different hidden layer nodes, a three-layer bidirectional LSTM network was used for training, and different numbers of hidden neurons were applied. The experiment applied four groups of different numbers of hidden neurons, namely, 64, 128, 256, and 512.</p>
<p>In the comparative experiment of the wavelet transform, all data added noise as interference. Seven different network structures were used for testing. For instance, the training data preprocessed by wavelet transform were used as the experimental group, and the training data trained using the original data were used as the control group. In this experiment, ABR data were decomposed in six layers, and the approximate and detailed components of the sixth layer and the fourth, fifth, and sixth layers were retained to reconstruct the waveform, respectively. The parameter configuration is consistent. The network was trained with five K-fold cross-validation (<italic>K</italic> = 9), and the test was performed to obtain the average value.</p>
<p>The output results are in the form of &#x0201C;region.&#x0201D; <xref ref-type="fig" rid="F7">Figure 7</xref> expresses the output visualization, where the curve is the original ABR used for identification, and the red labels are the network prediction classification results reduced by four times. The ABR of the first 8 ms is clearly divided into two different labels. The part with 1 is the identified peak, and the other part is the identified characteristic nonpeak. Postprocessing is defined as follows: A total of 20 sampling points (0.5 ms) are set as the threshold. The area within 20 sampling points between the beginning and the end is the same characteristic wave area. Finally, the time mean value of the first and last points is calculated as the time value of the recognized characteristic wave. The similar sampling points are calculated to obtain the unique characteristic wave value. Finally, the recognition accuracy rate is calculated according to the identified ABR feature wave position.</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>Feature labeling on the ABR, where <bold>(a)</bold> shows output by modes. <bold>(b)</bold> is result by postprocessing.</p></caption>
<graphic xlink:href="fmed-07-613708-g0007.tif"/>
</fig>
<p>Four recognition results of ABR data were randomly selected and presented in <xref ref-type="fig" rid="F8">Figure 8</xref>. After postprocessing, output vectors from models were converted to feature points. The identified feature points are almost identical to those selected using manual labeling techniques, illustrating the potential utility of this method in clinical settings. Even in some complex ABR data, manual annotation usually records multiple sets of data to determine the correct peak (<xref ref-type="fig" rid="F8">Figure 8d</xref>). However, the model can directly and accurately identify the peak of the waveform from a single waveform (<xref ref-type="fig" rid="F8">Figure 8h</xref>). Therefore, they also verify the possibility of the proposed method. To better verify the accuracy of recognition, this work has carried out a quantitative discussion from different network structures, wavelet transform processing, and number of hidden neurons. However, the model may also lead to some misjudgments. For example, <xref ref-type="fig" rid="F9">Figure 9a</xref> shows an incorrect recognition result. Since wave I and wave III of the waveform are not obvious, enough continuous identification points cannot be obtained. Therefore, only relatively obvious wave V is obtained after postprocessing (<xref ref-type="fig" rid="F9">Figure 9c</xref>). Also, <xref ref-type="fig" rid="F9">Figure 9b</xref> presents another wrong result. In this case, the obtained error of wave I reached 0.67 ms. This is because the model has judged the wrong wave I (<xref ref-type="fig" rid="F9">Figure 9d</xref>). Thus, in future work, improving the model&#x00027;s ability to analyze complex waveforms is still an important direction.</p>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>Recognition results of four data, where <bold>(a&#x02013;d)</bold> are manual labels. Also, <bold>(e&#x02013;h)</bold> represent outputs of the proposed three-layer BiLSTM model.</p></caption>
<graphic xlink:href="fmed-07-613708-g0008.tif"/>
</fig>
<fig id="F9" position="float">
<label>Figure 9</label>
<caption><p>Two error recognition results, where <bold>(a,b)</bold> are manual labels. Also, <bold>(c,d)</bold> represent outputs of the proposed three-layer BiLSTM model.</p></caption>
<graphic xlink:href="fmed-07-613708-g0009.tif"/>
</fig>
</sec>
<sec>
<title>Comparison Between Multiple Network Structures</title>
<p>Generally, an error scale of 0.2 ms is applied as a scale range of clinically marked points. Three criterion values for the maximum allowable error value (ME) were tested: &#x02212;0.1, 0.15, and 0.2 ms. The prediction result was deemed acceptable if the prediction point and the manually identified point were within the ME criteria range. According to the number of correct prediction points <italic>r</italic><sub><italic>p</italic></sub> and the total marked points <italic>p</italic><sub><italic>n</italic></sub>, the accuracy (ACC) rate is calculated using <italic>r</italic><sub><italic>p</italic></sub>/<italic>p</italic><sub><italic>n</italic></sub>, as shown in Equation (13):</p>
<disp-formula id="E14"><label>(13)</label><mml:math id="M17"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext>ACC</mml:mtext><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>/</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>In this study, three error scales (ME) of 0.1, 0.15, and 0.2 ms were calculated, respectively, to further explore the recognition accuracy and other related laws. Loss value of training results with different network structures and the ACC under different error scales are revealed in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Loss value and ACC of each network structure.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"><bold>Network</bold><break/> <bold>structure</bold></th>
<th valign="top" align="center"><bold>Training</bold><break/> <bold>loss</bold></th>
<th valign="top" align="center"><bold>Validation</bold><break/> <bold>loss</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold><break/> <bold>(0.1 ms) (%)</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold><break/> <bold>(0.15 ms) (%)</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold><break/> <bold>(0.2 ms) (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">LSTM</td>
<td valign="top" align="center">0.1463</td>
<td valign="top" align="center">0.1635</td>
<td valign="top" align="center">37.08</td>
<td valign="top" align="center">44.92</td>
<td valign="top" align="center">50.37</td>
</tr>
<tr>
<td valign="top" align="left">LSTMx2</td>
<td valign="top" align="center">0.1123</td>
<td valign="top" align="center">0.1625</td>
<td valign="top" align="center">58.61</td>
<td valign="top" align="center">65.75</td>
<td valign="top" align="center">70.59</td>
</tr>
<tr>
<td valign="top" align="left">BiLSTM</td>
<td valign="top" align="center">0.1264</td>
<td valign="top" align="center">0.1562</td>
<td valign="top" align="center">61.96</td>
<td valign="top" align="center">72.03</td>
<td valign="top" align="center">77.60</td>
</tr>
<tr>
<td valign="top" align="left">BiLSTMx2</td>
<td valign="top" align="center">0.0849</td>
<td valign="top" align="center">0.1285</td>
<td valign="top" align="center">78.74</td>
<td valign="top" align="center">84.88</td>
<td valign="top" align="center">86.84</td>
</tr>
<tr>
<td valign="top" align="left">BiLSTMx3</td>
<td valign="top" align="center">0.0704</td>
<td valign="top" align="center">0.1275</td>
<td valign="top" align="center">85.46</td>
<td valign="top" align="center">91.06</td>
<td valign="top" align="center">92.91</td>
</tr>
<tr>
<td valign="top" align="left">BiLSTMx4</td>
<td valign="top" align="center">0.0651</td>
<td valign="top" align="center">0.1342</td>
<td valign="top" align="center">82.48</td>
<td valign="top" align="center">88.32</td>
<td valign="top" align="center">90.20</td>
</tr>
<tr>
<td valign="top" align="left">BiLSTMx5</td>
<td valign="top" align="center">0.0617</td>
<td valign="top" align="center">0.1467</td>
<td valign="top" align="center">83.31</td>
<td valign="top" align="center">88.80</td>
<td valign="top" align="center">90.90</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="fig" rid="F10">Figure 10A</xref> indicates data distribution to observe correlation with different network structures visually. Notably, the ACC of the BiLSTM network is higher than that of the LSTM network. In addition, the ACC of the single-layer BiLSTM network and the double-layer LSTM network is similar. The reason is due to the fact that the two-way LSTM network has a similar structure to the double-layer LSTM network. However, information in the BiLSTM network has the characteristics of propagating in forward and reverse directions, whereas the two-layer LSTM network only propagates in the forward sequence over time. This phenomenon leads to differences in the ACC between the two models. The LSTM and BiLSTM networks increase ACC with the number of superimposed layers. After the BiLSTM network reaches three layers, the ACC will no longer increase significantly. Network structure will gradually reach an over-fitting state and increase computational pressure because of excessive parameters. Thus, the three-layer BiLSTM network is a better choice.</p>
<fig id="F10" position="float">
<label>Figure 10</label>
<caption><p><bold>(A)</bold> ACC metrics with different network structures. In the statistical results, the three-layer BiLSTM network reached 92.91% and is the highest index among all the networks. The single-layer LSTM, which has the lowest index, is about half of it. <bold>(B)</bold> ACC metrics with different hidden nodes, where the 512 nodes ranked first, and the 256 and 128 quantities stood at the second and third positions. Also, the 64 nodes ranked last.</p></caption>
<graphic xlink:href="fmed-07-613708-g0010.tif"/>
</fig>
</sec>
<sec>
<title>Wavelet Transform Experiment</title>
<p>When testing the ACC of the wavelet transform, ABR data was decomposed in six layers. Also, approximate components of the sixth layer and detailed components of the fourth, fifth, and sixth layers were retained to reconstruct the waveform. <xref ref-type="fig" rid="F11">Figure 11</xref> expresses an instance of filtered result by wavelet transform. The curve processed by wavelet transform becomes smoother. Then, unprocessed ABR data served as a control experiment. In this work, detection and comparison were carried out based on two error scales of 0.1 and 0.2 ms (<xref ref-type="table" rid="T2">Table 2</xref>). The results of recognition ACC are shown in <xref ref-type="fig" rid="F12">Figure 12</xref>.</p>
<fig id="F11" position="float">
<label>Figure 11</label>
<caption><p>An instance result from the wavelet transform, where <bold>(a)</bold> is the original data. An obvious interference occurred in this waveform. <bold>(b)</bold> is obtained after smoothing.</p></caption>
<graphic xlink:href="fmed-07-613708-g0011.tif"/>
</fig>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>The ACC of each network structure with original data and wavelet transform data.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"><bold>Network</bold><break/> <bold>structure</bold></th>
<th valign="top" align="center"><bold>Original data</bold><break/> <bold>(0.1 ms) (%)</bold></th>
<th valign="top" align="center"><bold>Wavelet transform data</bold><break/> <bold>(0.1 ms) (%)</bold></th>
<th valign="top" align="center"><bold>Original data</bold><break/> <bold>(0.2 ms) (%)</bold></th>
<th valign="top" align="center"><bold>Wavelet transform data</bold><break/> <bold>(0.2 ms) (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">LSTM</td>
<td valign="top" align="center">37.08</td>
<td valign="top" align="center">37.95</td>
<td valign="top" align="center">50.37</td>
<td valign="top" align="center">52.94</td>
</tr>
<tr>
<td valign="top" align="left">LSTMx2</td>
<td valign="top" align="center">58.61</td>
<td valign="top" align="center">55.47</td>
<td valign="top" align="center">70.59</td>
<td valign="top" align="center">72.46</td>
</tr>
<tr>
<td valign="top" align="left">BiLSTM</td>
<td valign="top" align="center">61.96</td>
<td valign="top" align="center">59.17</td>
<td valign="top" align="center">77.60</td>
<td valign="top" align="center">76.25</td>
</tr>
<tr>
<td valign="top" align="left">BiLSTMx2</td>
<td valign="top" align="center">78.74</td>
<td valign="top" align="center">73.03</td>
<td valign="top" align="center">86.84</td>
<td valign="top" align="center">84.71</td>
</tr>
<tr>
<td valign="top" align="left">BiLSTMx3</td>
<td valign="top" align="center">85.46</td>
<td valign="top" align="center">79.00</td>
<td valign="top" align="center">92.91</td>
<td valign="top" align="center">90.50</td>
</tr>
<tr>
<td valign="top" align="left">BiLSTMx4</td>
<td valign="top" align="center">82.48</td>
<td valign="top" align="center">77.73</td>
<td valign="top" align="center">90.20</td>
<td valign="top" align="center">89.67</td>
</tr>
<tr>
<td valign="top" align="left">BiLSTMx5</td>
<td valign="top" align="center">83.31</td>
<td valign="top" align="center">78.09</td>
<td valign="top" align="center">90.90</td>
<td valign="top" align="center">89.17</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F12" position="float">
<label>Figure 12</label>
<caption><p>Influence of wavelet transform preprocessing on accuracy. wt represents the results obtained by wavelet transform preprocessing.</p></caption>
<graphic xlink:href="fmed-07-613708-g0012.tif"/>
</fig>
<p>Recognition ACC values of preprocessing in the LSTM network using wavelet transform are slightly higher than those of the control group. However, they are not as good as those in the control group in the BiLSTM network. Especially, the highest ACC difference reaches 6.46% when calculated with a 0.1-ms error scale. Also, the difference reduces to &#x0003C;3% when calculated with a 0.2-ms error scale. Results indicate that wavelet transform preprocessing does not obtain a higher ACC by smoothing curves. Due to wavelet decomposition and reconstruction, a slight deviation was created in the position of wave crest. Some information was destroyed in the ABR waveform; therefore, the results of training and recognition were affected. This means that the BiLSTM network has noise immunity and can handle low-quality ABR data.</p>
</sec>
<sec>
<title>Comparative Experiments of Different Hidden Layer Nodes</title>
<p>Based on the above results, the three-layer BiLSTM network is a better choice. The ACC results with different hidden node numbers were discussed in this work (<xref ref-type="table" rid="T3">Table 3</xref>). <xref ref-type="fig" rid="F10">Figure 10B</xref> expresses the ACC results with different hidden layer nodes of 64, 128, 256, and 512. Obviously, recognition ACC increases with the number of hidden nodes, because enough parameters make network fitting accurately. Also, the ACC of the 0.2-ms error scale increases slowly during the change process of 256&#x02013;512 nodes and is basically saturated. Considering accuracy standard in practical applications and time cost of training that may be brought by the increasing number of hidden nodes, a network of 512 hidden nodes is a better choice.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>The ACC with different hidden layer nodes.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"><bold>Hidden layer nodes</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold><break/> <bold>(0.1 ms) (%)</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold><break/> <bold>(0.15 ms) (%)</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold><break/> <bold>(0.2 ms) (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">64</td>
<td valign="top" align="center">70.50</td>
<td valign="top" align="center">80.61</td>
<td valign="top" align="center">83.48</td>
</tr>
<tr>
<td valign="top" align="left">128</td>
<td valign="top" align="center">73.90</td>
<td valign="top" align="center">82.44</td>
<td valign="top" align="center">85.36</td>
</tr>
<tr>
<td valign="top" align="left">256</td>
<td valign="top" align="center">80.44</td>
<td valign="top" align="center">87.49</td>
<td valign="top" align="center">91.07</td>
</tr>
<tr>
<td valign="top" align="left">512</td>
<td valign="top" align="center">85.46</td>
<td valign="top" align="center">91.06</td>
<td valign="top" align="center">92.91</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Furthermore, this work mainly discusses the characteristic wave recognition process of a click ABR with a 96-dB nHL stimulus. Also, only parameters such as latency and wave interval can be obtained. In clinical applications, many indicators can still be used as a diagnostic basis, such as the relationship between potential values of different stimulus sizes, response and disappearance of wave V, and change of interwave latency of each characteristic wave. This also provides a new idea for the subsequent computer-assisted ABR diagnosis and treatment.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>This work proposes an automatic recognition method for ABR characteristic waveforms using the BiLSTM network. The main purpose is to identify positions of characteristic waves I, III, and V, which assist the medical staff in obtaining relevant clinical test parameters, such as interwave latency and wave interval. A data quantification process is designed to analyze the characteristic waveform of ABR, including selection area of potential signal and expansion of label position. An optimal network model structure is obtained through multiple sets of comparative experiments. In 614 sets of clinically collected ABR waveform experiments, the network&#x00027;s overall recognition of characteristic waves showed an ACC of 92.91%.</p>
<p>Experimental results express that the method proposes a new idea for the identification of ABR characteristic waveforms, and helps professionals to obtain interwave latency parameters in ABR waveforms. Therefore, a computer automatic identification method can obtain deeper information, avoid subjective judgment error by the medical staff in the manual identification process effectively, reduce the number of repeated stimulations during a test, and also avoid vision fatigue of the tested person. Because of noise immunity of the proposed network model, it can effectively reduce repetitive detection of patients. In the process of large-scale identification, the average time of each data by using the method only takes approximately 0.05 s, which is much faster than the speed of manual identification. Thus, it has great advantages in repeatable work.</p>
<p>Some efforts have been proposed to analyze ABR waveforms using deep learning methods. For example, Fallata and Dajani (<xref ref-type="bibr" rid="B18">18</xref>) proposed a new detection method of ABR based on ANN to reduce detection time. Before ANN calculation, discrete wavelet transform was processed to extract features of ABR. The reduction in recording time was expected to promote the application of this measurement technique in clinical practice. McKearney and MacKinnon (<xref ref-type="bibr" rid="B19">19</xref>) divided ABR data into clear response, uncertain, or no response. In their work, they constructed a deep convolutional neural network and fine-tuned it to realize ABR classification. Results showed that the network may have clinical utility in assisting clinicians in waveform classification for the purpose of hearing threshold estimation. Different from the existing works, this research proposed a new data processing method and established an end-to-end deep learning model. The model can also be directly calculated without complicated mathematical transformations, so it provides a new idea for deep learning in signal processing.</p>
</sec>
<sec sec-type="data-availability-statement" id="s5">
<title>Data Availability Statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author/s.</p>
</sec>
<sec id="s6">
<title>Ethics Statement</title>
<p>The studies involving human participants were reviewed and approved by The ethic committee of the PLA General Hospital. Written informed consent to participate in this study was provided by the participants&#x00027; legal guardian/next of kin. Written informed consent was obtained from the individual(s) for the publication of any potentially identifiable images or data included in this article.</p>
</sec>
<sec id="s7">
<title>Author Contributions</title>
<p>CC and LZ: conceptualization and writing&#x02014;original draft preparation. CC: methodology. XP: software and data curation. HQ, FX, and WS: validation. MS: formal analysis. FJ: investigation. QW: resources. RX and NY: writing&#x02014;review and editing. LZ: visualization. NY: supervision. ZW and XG: project administration. RX: funding acquisition. All authors have read and agreed to the published version of the manuscript.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
</body>
<back>
<ack><p>This manuscript has been released as a pre-print at bioRxiv (<xref ref-type="bibr" rid="B20">20</xref>).</p>
</ack>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Marina</surname> <given-names>SA</given-names></name> <name><surname>Antonio</surname> <given-names>EF</given-names></name> <name><surname>Tobias</surname> <given-names>R</given-names></name></person-group>. <article-title>Individual differences in the attentional modulation of the human auditory brainstem response to speech inform on speech-in-noise deficits</article-title>. <source>Sci Rep.</source> (<year>2019</year>) <volume>9</volume>:<fpage>14131</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-019-50773-1</pub-id><pub-id pub-id-type="pmid">31575950</pub-id></citation></ref>
<ref id="B2">
<label>2.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Etard</surname> <given-names>O</given-names></name> <name><surname>Kegler</surname> <given-names>M</given-names></name> <name><surname>Braiman</surname> <given-names>C</given-names></name> <name><surname>Frote</surname> <given-names>AE</given-names></name> <name><surname>Reichenbach</surname> <given-names>T</given-names></name></person-group>. <article-title>Decoding of selective attention to continuous speech from the human auditory brainstem response</article-title>. <source>NeuroImage.</source> (<year>2019</year>) <volume>200</volume>:<fpage>1</fpage>&#x02013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1016/j.neuroimage.2019.06.029</pub-id><pub-id pub-id-type="pmid">31212098</pub-id></citation></ref>
<ref id="B3">
<label>3.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kurihara</surname> <given-names>T</given-names></name> <name><surname>Igarashi</surname> <given-names>Y</given-names></name> <name><surname>Kobai</surname> <given-names>K</given-names></name> <name><surname>Mizobuchi</surname> <given-names>T</given-names></name> <name><surname>Yokota</surname> <given-names>H</given-names></name></person-group>. <article-title>Diagnosis and prediction of prognosis for Bickerstaff&#x00027;s brainstem encephalitis using auditory brainstem response: a case report</article-title>. <source>Acute Med Surg.</source> (<year>2020</year>) <volume>7</volume>:<fpage>e517</fpage>. <pub-id pub-id-type="doi">10.1002/ams2.517</pub-id><pub-id pub-id-type="pmid">32685172</pub-id></citation></ref>
<ref id="B4">
<label>4.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Feng</surname> <given-names>S</given-names></name> <name><surname>Li</surname> <given-names>X</given-names></name> <name><surname>Luo</surname> <given-names>Y</given-names></name> <name><surname>Li</surname> <given-names>W</given-names></name> <name><surname>Wang</surname> <given-names>Z</given-names></name> <name><surname>Jiang</surname> <given-names>X</given-names></name></person-group>. <article-title>Characteristics and clinical significance of auditory brainstem response in tinnitus with normal auditory thresholds</article-title>. <source>Chin J Otol.</source> (<year>2019</year>) <volume>17</volume>:<fpage>209</fpage>&#x02013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.3969/j.issn.1672-2922.2019.02.013</pub-id></citation></ref>
<ref id="B5">
<label>5.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Jiang</surname> <given-names>Y</given-names></name> <name><surname>Wang</surname> <given-names>D</given-names></name> <name><surname>Liu</surname> <given-names>Z</given-names></name> <name><surname>Tan</surname> <given-names>J</given-names></name> <name><surname>Li</surname> <given-names>G</given-names></name></person-group>. <article-title>Comparing auditory brainstem responses evoked by click and sweep-tone in normal-hearing adults</article-title>. In: <source>2019 41st Annual International Conference of the IEEE Engineering in Medicine &#x00026; Biology Society (EMBC)</source>. <publisher-loc>Berlin</publisher-loc>: <publisher-name>IEEE</publisher-name> (<year>2019</year>). p. <fpage>5237</fpage>&#x02013;<lpage>40</lpage>. <pub-id pub-id-type="pmid">31947039</pub-id></citation></ref>
<ref id="B6">
<label>6.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Qu</surname> <given-names>L</given-names></name> <name><surname>Tao</surname> <given-names>L</given-names></name> <name><surname>Zeng</surname> <given-names>M</given-names></name></person-group>. <article-title>Analysis of the characteristics of auditory brainstem response in high-risk neonates of different gestational age</article-title>. <source>Mater Child Health Care China.</source> (<year>2013</year>) <volume>28</volume>:<fpage>4322</fpage>&#x02013;<lpage>4</lpage>. <pub-id pub-id-type="doi">10.7620/zgfybj.j.issn.1001&#x02013;4411.2013.28.22</pub-id></citation></ref>
<ref id="B7">
<label>7.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sara</surname> <given-names>MK</given-names></name> <name><surname>MadsenJames</surname> <given-names>M</given-names></name> <name><surname>HarteClaus</surname> <given-names>ED</given-names></name></person-group>. <article-title>Accuracy of averaged auditory brainstem response amplitude and latency estimates</article-title>. <source>Int J Audiol.</source> (<year>2018</year>) <volume>57</volume>:<fpage>345</fpage>&#x02013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1080/14992027.2017.1381770</pub-id><pub-id pub-id-type="pmid">28971715</pub-id></citation></ref>
<ref id="B8">
<label>8.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lewis</surname> <given-names>JD</given-names></name> <name><surname>Kopun</surname> <given-names>J</given-names></name> <name><surname>Neely</surname> <given-names>ST</given-names></name> <name><surname>Schimid</surname> <given-names>KK</given-names></name> <name><surname>Gorga</surname> <given-names>MP</given-names></name></person-group>. <article-title>Tone-burst auditory brainstem response wave V latencies in normal-hearing and hearing-impaired ears</article-title>. <source>J Acoust Soc Am.</source> (<year>2015</year>) <volume>138</volume>:<fpage>3210</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1121/1.4935516</pub-id><pub-id pub-id-type="pmid">26627795</pub-id></citation></ref>
<ref id="B9">
<label>9.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Y</given-names></name> <name><surname>Lan</surname> <given-names>Y</given-names></name> <name><surname>Wang</surname> <given-names>L</given-names></name> <name><surname>Yu</surname> <given-names>K</given-names></name> <name><surname>Zhao</surname> <given-names>W</given-names></name> <name><surname>Wang</surname> <given-names>T</given-names></name></person-group>. <article-title>ABR in early diagnosis and its prognosis of patients with dysaudia after craniocerebral trauma</article-title>. <source>Prog Mod Biomed.</source> (<year>2016</year>) <volume>27</volume>:<fpage>5336</fpage>&#x02013;<lpage>9</lpage>.</citation></ref>
<ref id="B10">
<label>10.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Krumbholz</surname> <given-names>K</given-names></name> <name><surname>Hardy</surname> <given-names>AJ</given-names></name> <name><surname>Boer</surname> <given-names>J</given-names></name></person-group>. <article-title>Automated extraction of auditory brainstem response latencies and amplitudes by means of non-linear curve registration</article-title>. <source>Comput Methods Prog Biomed.</source> (<year>2020</year>) <volume>196</volume>:<fpage>105595</fpage>. <pub-id pub-id-type="doi">10.1016/j.cmpb.2020.105595</pub-id><pub-id pub-id-type="pmid">32563894</pub-id></citation></ref>
<ref id="B11">
<label>11.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wilson</surname> <given-names>WJ</given-names></name></person-group>. <article-title>The relationship between the auditory brain-stem response and its reconstructed waveforms following discrete wavelet transformation</article-title>. <source>Clin Neurophysiol.</source> (<year>2004</year>) <volume>115</volume>:<fpage>1129</fpage>&#x02013;<lpage>39</lpage>. <pub-id pub-id-type="doi">10.1016/j.clinph.2003.11.019</pub-id><pub-id pub-id-type="pmid">15066538</pub-id></citation></ref>
<ref id="B12">
<label>12.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bradley</surname> <given-names>AP</given-names></name> <name><surname>Wilson</surname> <given-names>WJ</given-names></name></person-group>. <article-title>Automated analysis of the auditory brainstem response using derivative estimation wavelets</article-title>. <source>Audiol Neurotol.</source> (<year>2005</year>) <volume>10</volume>:<fpage>6</fpage>&#x02013;<lpage>21</lpage>. <pub-id pub-id-type="doi">10.1159/000081544</pub-id><pub-id pub-id-type="pmid">15486440</pub-id></citation></ref>
<ref id="B13">
<label>13.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>R</given-names></name> <name><surname>McAllister</surname> <given-names>G</given-names></name> <name><surname>Scotney</surname> <given-names>B</given-names></name> <name><surname>Mcclean</surname> <given-names>S</given-names></name> <name><surname>Houston</surname> <given-names>G</given-names></name></person-group>. <article-title>Combining wavelet analysis and Bayesian networks for the classification of auditory brainstem response</article-title>. <source>IEEE Trans Inform Technol Biomed.</source> (<year>2006</year>) <volume>10</volume>:<fpage>458</fpage>&#x02013;<lpage>67</lpage>. <pub-id pub-id-type="doi">10.1109/TITB.2005.863865</pub-id><pub-id pub-id-type="pmid">16871712</pub-id></citation></ref>
<ref id="B14">
<label>14.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cross</surname> <given-names>J</given-names></name> <name><surname>Huang</surname> <given-names>L</given-names></name></person-group>. <article-title>Incremental parsing with minimal features using bi-directional LSTM</article-title>. In: <italic>Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics</italic>. Berlin (<year>2016</year>) <pub-id pub-id-type="doi">10.18653/v1/P16-2006</pub-id></citation></ref>
<ref id="B15">
<label>15.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sun</surname> <given-names>Y</given-names></name> <name><surname>Chen</surname> <given-names>ZX</given-names></name></person-group>. <article-title>Fast extraction method of auditory brainstem response based on wavelet transformation</article-title>. <source>Int Conf Wave Anal Patt Recogn.</source> (<year>2007</year>) <volume>4</volume>:<fpage>1862</fpage>&#x02013;<lpage>4</lpage>. <pub-id pub-id-type="doi">10.1109/ICWAPR.2007.4421758</pub-id></citation></ref>
<ref id="B16">
<label>16.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rushaidin</surname> <given-names>MM</given-names></name> <name><surname>Salleh</surname> <given-names>SH</given-names></name> <name><surname>Hafizi</surname> <given-names>O</given-names></name> <name><surname>Mahyar</surname> <given-names>H</given-names></name> <name><surname>Ariff</surname> <given-names>AK</given-names></name></person-group>. <article-title>Wave V detection using continuous wavelet transform of auditory brainstem response signal</article-title>. <source>Prog Electromag Res Symp.</source> (<year>2012</year>) <volume>2012</volume>:<fpage>1889</fpage>&#x02013;<lpage>93</lpage>.</citation></ref>
<ref id="B17">
<label>17.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Nasr</surname> <given-names>GE</given-names></name> <name><surname>Badr</surname> <given-names>EA</given-names></name> <name><surname>Joun</surname> <given-names>C</given-names></name></person-group>. <source>Cross Entropy Error Function in Neural Networks: Forecasting Gasoline Demand</source>. <publisher-loc>Alberta, CA</publisher-loc>: <publisher-name>The Florida ai Research Society</publisher-name>. (<year>2002</year>).</citation></ref>
<ref id="B18">
<label>18.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fallatah</surname> <given-names>A</given-names></name> <name><surname>Dajani</surname> <given-names>HR</given-names></name></person-group>. <article-title>Accurate detection of speech auditory brainstem responses using a spectral feature-based ANN method</article-title>. <source>Biomed Sign Process Control.</source> (<year>2018</year>) <volume>44</volume>:<fpage>307</fpage>&#x02013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1016/j.bspc.2018.05.007</pub-id></citation></ref>
<ref id="B19">
<label>19.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>McKearney</surname> <given-names>RM</given-names></name> <name><surname>MacKinnon</surname> <given-names>RC</given-names></name></person-group>. <article-title>Objective auditory brainstem response classification using machine learning</article-title>. <source>Int J Audiol.</source> (<year>2019</year>) <volume>58</volume>:<fpage>224</fpage>&#x02013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1080/14992027.2018.1551633</pub-id><pub-id pub-id-type="pmid">30663907</pub-id></citation></ref>
<ref id="B20">
<label>20.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>C</given-names></name> <name><surname>Zhan</surname> <given-names>L</given-names></name> <name><surname>Pan</surname> <given-names>XX</given-names></name> <name><surname>Wang</surname> <given-names>ZL</given-names></name> <name><surname>Guo</surname> <given-names>XY</given-names></name> <name><surname>Qin</surname> <given-names>HD</given-names></name> <etal/></person-group>. <article-title>Automatic recognition of auditory brainstem response characteristic waveform based on BiLSTM</article-title>. <source>bioRxiv. [Preprint]</source>. (<year>2020</year>). <pub-id pub-id-type="doi">10.1101/2020.10.03.324665</pub-id></citation></ref>
</ref-list>
<fn-group>
<fn fn-type="financial-disclosure"><p><bold>Funding.</bold> This work was funded by the National Key Research and Development Program (2017YFB1002804 and 2016YFC0901304), National Natural Science Foundation of China (61701022), PLA General Hospital (QNC19051), the Active Health Project of the Ministry of Science and Technology (2020YFC2004001), the Fundamental Research Funds for the Central Universities (FRF-BD-20-11A), and the Beijing Top Discipline for Artificial Intelligent Science and Engineering, University of Science and Technology Beijing.</p>
</fn>
</fn-group>
</back>
</article>