<?xml version="1.0" encoding="us-ascii"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Physiol.</journal-id>
<journal-title>Frontiers in Physiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Physiol.</abbrev-journal-title>
<issn pub-type="epub">1664-042X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1331852</article-id>
<article-id pub-id-type="doi">10.3389/fphys.2024.1331852</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Physiology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Delineation of intracavitary electrograms for the automatic quantification of decrement-evoked potentials in the coronary sinus with deep-learning techniques</article-title>
<alt-title alt-title-type="left-running-head">Jimenez-Perez et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fphys.2024.1331852">10.3389/fphys.2024.1331852</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Jimenez-Perez</surname>
<given-names>Guillermo</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1317446/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Acosta</surname>
<given-names>Juan</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Bocanegra-P&#xe9;rez</surname>
<given-names>&#xc1;lvaro J.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2384280/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Arana-Rueda</surname>
<given-names>Eduardo</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Frutos-L&#xf3;pez</surname>
<given-names>Manuel</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>S&#xe1;nchez-Brotons</surname>
<given-names>Juan A.</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Llamas-L&#xf3;pez</surname>
<given-names>Helena</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Di Massa Pezzutti</surname>
<given-names>Rodrigo</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Gonz&#xe1;lez de la Portilla Concha</surname>
<given-names>Carmen</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Camara</surname>
<given-names>Oscar</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/661411/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Pedrote</surname>
<given-names>Alonso</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>PhySense Research Group</institution>, <institution>BCN MedTech</institution>, <institution>Universitat Pompeu Fabra</institution>, <addr-line>Barcelona</addr-line>, <country>Spain</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Arrhythmia Unit</institution>, <institution>Department of Cardiology at Virgen Del Roc&#xed;o University Hospital</institution>, <addr-line>Sevilla</addr-line>, <country>Spain</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/436378/overview">Xin Li</ext-link>, University of Leicester, United Kingdom</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/663515/overview">Rasheda Chowdhury</ext-link>, Imperial College London, United Kingdom</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1016942/overview">Prasanth Ganesan</ext-link>, Stanford University, United States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Juan Acosta, <email>juan.acostamartinez@gmail.com</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>07</day>
<month>05</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1331852</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>11</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>10</day>
<month>04</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Jimenez-Perez, Acosta, Bocanegra-P&#xe9;rez, Arana-Rueda, Frutos-L&#xf3;pez, S&#xe1;nchez-Brotons, Llamas-L&#xf3;pez, Di Massa Pezzutti, Gonz&#xe1;lez de la Portilla Concha, Camara and Pedrote.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Jimenez-Perez, Acosta, Bocanegra-P&#xe9;rez, Arana-Rueda, Frutos-L&#xf3;pez, S&#xe1;nchez-Brotons, Llamas-L&#xf3;pez, Di Massa Pezzutti, Gonz&#xe1;lez de la Portilla Concha, Camara and Pedrote</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Cardiac arrhythmias cause depolarization waves to conduct unevenly on the myocardial surface, potentially delaying local components with respect to a previous beat when stimulated at faster frequencies. Despite the diagnostic value of localizing the distinct local electrocardiogram (EGM) components for identifying regions with decrement-evoked potentials (DEEPs), current software solutions do not perform automatic signal quantification. Electrophysiologists must manually measure distances on the EGM signals to assess the existence of DEEPs during pacing or extra-stimuli protocols. In this work, we present a deep learning (DL)-based algorithm to identify decrement in atrial components (measured in the coronary sinus) with respect to their ventricular counterparts from EGM signals, for disambiguating between accessory pathways (APs) and atrioventricular re-entrant tachycardias (AVRTs). Several U-Net and W-Net neural networks with different configurations were trained on a private dataset of signals from the coronary sinus (312 EGM recordings from 77 patients who underwent AP or AVRT ablation). A second, separate dataset was annotated for clinical validation, with clinical labels associated to EGM fragments in which decremental conduction was elucidated. To alleviate data scarcity, a synthetic data augmentation method was developed for generating EGM recordings. Moreover, two novel loss functions were developed to minimize false negatives and delineation errors. Finally, the addition of self-attention mechanisms and their effect on model performance was explored. The best performing model was a W-Net model with 6 levels, optimized solely with the Dice loss. The model obtained precisions of 91.28%, 77.78% and of 100.0%, and recalls of 94.86%, 95.25% and 100.0% for localizing local field, far field activations, and extra-stimuli, respectively. The clinical validation model demonstrated good overall agreement with respect to the evaluation of decremental properties. When compared to the criteria of electrophysiologists, the automatic exclusion step reached a sensitivity of 87.06% and a specificity of 97.03%. Out of the non-excluded signals, a sensitivity of 96.77% and a specificity of 95.24% was obtained for classifying them into decremental and non-decremental potentials. Current results show great promise while being, to the best of our knowledge, the first tool in the literature allowing the delineation of all local components present in an EGM recording. This is of capital importance at advancing processing for cardiac electrophysiological procedures and reducing intervention times, as many diagnosis procedures are performed by comparing segments or late potentials in subsequent cardiac cycles.</p>
</abstract>
<kwd-group>
<kwd>intracavitary electrograms</kwd>
<kwd>decrement-evoked potentials</kwd>
<kwd>deep-learning</kwd>
<kwd>automatic signal delineation</kwd>
<kwd>coronary sinus</kwd>
<kwd>local field components</kwd>
<kwd>synthetic data</kwd>
</kwd-group>
<contract-sponsor id="cn001">Generalitat de Catalunya<named-content content-type="fundref-id">10.13039/501100002809</named-content>
</contract-sponsor>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Cardiac Electrophysiology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Understanding deviations in electrical conduction patterns is a key task when diagnosing cardiac arrhythmias (CAs) in electrophysiology (EP) procedures <xref ref-type="bibr" rid="B17">Porta-S&#xe1;nchez et al. (2018)</xref>. During EP interventions, a series of local activation patterns or electrograms (EGM) are recorded, which correspond to depolarization waves captured by special catheters. While these EGMs are represented as isolated electrical deflections in normal cardiac tissue, CAs cause depolarization waves to conduct unevenly on the myocardial surface, which alter the morphology of an EGM, induce decremental response of the tissue, generate fractionations in the local components (local fields, LF) or produce the appearance of late potentials (LP) <xref ref-type="bibr" rid="B27">Zeppenfeld and Porta-S&#xe1;nchez (2020)</xref>.</p>
<p>Decremental response is especially important as a diagnostic marker. Decrement occurs when local components are delayed with respect to a previous beat when stimulated at faster frequencies. This decrement may be naturally caused (e.g., the AV node delays conduction at faster firing frequencies) or induced by lesions in the myocardium. Current clinical guidelines hint at the diagnostic value of decrement-evoked potentials (DEEPs), which are portions of tissue presenting decremental conduction. Those DEEPs are diagnosed by producing extrastimuli in specific myocardial positions <xref ref-type="bibr" rid="B1">Acosta et al. (2016)</xref>, <xref ref-type="bibr" rid="B2">Acosta et al. (2020)</xref>. In this work, the presence or absence of decrement in atrial components (measured in the coronary sinus, CS) with respect to their ventricular counterparts will be explored for disambiguating between accessory pathways (APs) and atrioventricular reentrant tachycardias (AVRTs).</p>
<p>Despite the importance of localizing the distinct local EGM components for assessing the existence of DEEPs, current software solutions do not perform automatic signal quantification <xref ref-type="bibr" rid="B27">Zeppenfeld and Porta-S&#xe1;nchez (2020)</xref>. Electrophysiologists must manually measure distances on the EGM signals to assess the existence of DEEPs during pacing or extrastimuli protocols. Even state-of-the-art 3D electroanatomical mapping systems (EAMs) only locate the local field signal with the largest deflection within a cardiac cycle <xref ref-type="bibr" rid="B27">Zeppenfeld and Porta-S&#xe1;nchez (2020)</xref> with relatively simple and error-prone algorithms, which often forces EAM operators to reassign fiducials <xref ref-type="bibr" rid="B27">Zeppenfeld and Porta-S&#xe1;nchez (2020)</xref>.</p>
<p>Some computational solutions for EGM signal analysis exist. These algorithms are based on calculating digital signal processing (DSP)-based transformations on the data, such as filtering or Fourier/wavelet transforms (FT and WT, respectively), which aid in reducing data complexity for producing robust signal detection. <xref ref-type="bibr" rid="B16">Osorio et al. (2017)</xref> produced an algorithm based on filtering out high-frequency components for locating local components in AF recordings. Similarly, <xref ref-type="bibr" rid="B7">Felix et al. (2015)</xref> used a threshold-based WT pipeline for estimating LFs. In <xref ref-type="bibr" rid="B6">Faes et al. (2002)</xref>, the authors proposed to estimate the local activation time (LAT) from the barycenter of LFs in bipolar EGMs, after filtering and adaptive thresholding. On the other hand, <xref ref-type="bibr" rid="B8">Hajimolahoseini et al. (2018)</xref> used a Gaussian mixture model for the analysis of the natural logarithm of the signal. To the best of our knowledge, only <xref ref-type="bibr" rid="B3">Alcaine et al. (2013)</xref>, <xref ref-type="bibr" rid="B4">Alcaine et al. (2014)</xref> directly attempted EGM delineation. The authors firstly delineated onsets and offsets of the surface QRS complex, which was used for windowing the EGM. Then, the WT was used on the signal&#x2019;s envelope alongside a rule-based algorithm to determine the onset/offset pair of the LFs, reaching good delineation performance. This approach, however, cannot be used to delineate isolated LPs or extra LFs in patients with AF, preventing its usage as a general purpose tool. Neither of the aforementioned works in the literature produce detections of individual waves outside the most salient component, with only <xref ref-type="bibr" rid="B3">Alcaine et al. (2013)</xref>, <xref ref-type="bibr" rid="B4">Alcaine et al. (2014)</xref> computing the onsets and offsets of the predicted wave.</p>
<p>In recent times, deep learning (DL) algorithms have gained popularity for automated data analysis, given their minimal pre-processing requirements and high performance. In the specific case of cardiac signals, some solutions exist for automatic electrocardiogram (ECG) quantification <xref ref-type="bibr" rid="B11">Jimenez-Perez et al. (2019)</xref>, <xref ref-type="bibr" rid="B10">Jimenez-Perez et al. (2021a)</xref>, <xref ref-type="bibr" rid="B12">Jimenez-Perez et al. (2021b)</xref>. However, not many algorithms have been developed for analyzing EGMs, and they revolve around classification <xref ref-type="bibr" rid="B19">Rodrigo et al. (2021)</xref>. In this work, several fully-convolutional network (FCN), the U-Net <xref ref-type="bibr" rid="B20">Ronneberger et al. (2015)</xref> and the W-Net <xref ref-type="bibr" rid="B23">Xia and Kulis (2017)</xref> with different configurations, were trained on a private dataset of signals from the CS. To alleviate data scarcity, a synthetic data augmentation method was developed for generating EGM recordings. Moreover, two novel loss functions were developed to minimize false negatives and delineation errors. Finally, the addition of self-attention mechanisms and their effect on model performance was explored <xref ref-type="bibr" rid="B22">Wang et al. (2020)</xref>. To the best of our knowledge, this is the first developed approach for delineation of intracavitary electrocardiograms (iECG), bridging the gap between the ECG and iECG communities.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<p>This section firstly describes the employed datasets in <xref ref-type="sec" rid="s2-1">Section 2.1</xref>. Secondly, the EGM analysis pipeline is defined, consisting the generation of synthetic tracings (<xref ref-type="sec" rid="s2-2">Section 2.2</xref>), the DL architecture (<xref ref-type="sec" rid="s2-3">Section 2.3</xref>) and the list of performed experiments (<xref ref-type="sec" rid="s2-5">Section 2.5</xref>).</p>
<sec id="s2-1">
<title>2.1 Materials</title>
<p>A proprietary EGM delineation dataset was developed in the Hospital Universitario Virgen del Roc&#xed;o (Sevilla, Spain). This dataset comprises 312 EGM recordings of variable size taken from 77 patients who underwent AP or AVRT ablation, following the ablation protocol recommended in the standard-of-care. The LF and FF activations were manually annotated using a Python tool to mark their onsets and offsets, and these fiducials were then validated by a panel of certified cardiologists. A LF activation was considered when the catheter was placed into a specific anatomical structure (e.g., the left ventricle) and the EGM depicted a high-frequency activation, whereas the FF activation was considered a low-frequency activation occurring elsewhere but propagated to the local tissue (e.g., atrial activation in the left ventricule). In total, 20,671 LF, 13,354 FF and 318 stimulation artifacts annotations were generated. All interventions recorded 5 bipolar EGMs from decapolar catheter (CS-1 or proximal through CS-5 or distal) during pacing or application of extrastimuli while testing for decremental conduction. A Bard Labsystem Pro EP Recording System &#x24d2; was used (1,000 Hz sampling frequency, 16 bits resolution, 2.5 <italic>&#x3bc;</italic>V/bit, bandpass-filtered in [30, 500] Hz).</p>
<p>The annotations were represented as binary masks for their usage as optimization targets in the segmentation architectures, where a mask of shape {0,1}<sup>3&#xd7;<italic>N</italic>
</sup> was <italic>True</italic>-valued whenever a specific sample <italic>n</italic> &#x2208; [0, <italic>N</italic>] was contained within a stimulation, LF or FF activation (indices 0, 1 and 2, respectively) <xref ref-type="bibr" rid="B12">Jimenez-Perez et al. (2021b)</xref>. The dataset was split 75%&#x2013;25% so that all bipolar EGMs from the same patient would either be in the training or the testing sets, producing a training set and a held-out testing set (49 and 28 patients, respectively). <xref ref-type="fig" rid="F1">Figure 1</xref> shows an annotated EGM signal.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Generated ground truth for an intracavitary electrocardiographic recording at the coronary sinus. The green and magenta overlays represent, respectively, local field activations from the coronary sinus and the ventricular far field. The recording presents ventricular pacing and decremental properties.</p>
</caption>
<graphic xlink:href="fphys-15-1331852-g001.tif"/>
</fig>
<p>A second, completely separate dataset was annotated for clinical validation and was not used for model training or validation. This dataset did not contain delineation annotations (onsets/offsets of LF and FF activations), but clinical labels associated to EGM fragments in which decremental conduction was elucidated. The study protocol consisted in the application of a simple pacing (S &#x3d; [400, 600] ms) followed by an extra-stimulus (S<sub>2</sub> &#x3d; effective refractory period (ERP) &#x2b; [20, 60] ms), measuring the delay in response caused by the AV node. The recordings were annotated by expert electrophysiologists, where three possible labels were assigned to each recording: decremental (if the time delay after S<sub>2</sub> exceeded 10 ms), non-decremental or non-interpretable (loss of capture in S<sub>2</sub> or no conduction through AV node). In total, 321 recordings from 50 patients were annotated and analysed.</p>
</sec>
<sec id="s2-2">
<title>2.2 Synthetic data augmentation</title>
<p>EGM recordings have segments of electrical silence (or rest), in which one or several LF or FF activations may be contained. Taking advantage of this modular structure, an algorithm for generating synthetic data was developed in this work. The algorithm has two major steps: data pre-processing and trace generation. <xref ref-type="fig" rid="F2">Figure 2</xref> schematically represents the synthetic data augmentation pipeline.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Synthetic data generation pipeline. The data pre-processing step [<bold>(A)</bold>; blue shading] consists of: (1) cropping the ground truth segments into different data &#x201c;pools&#x201d; (local field [LF] in green, far field [FF] in magenta, and rest); and (2) fitting the original segment amplitudes to log-normal distributions with respect to the amplitude of the local field (<italic>amplitude</italic>
<sub>segment</sub>/<italic>amplitude</italic>
<sub>
<italic>LF</italic>
</sub>). The cycle orchestration step [<bold>(B)</bold>; orange shading] involves: (1) generating a set of registry-wide rules for all cardiac cycles; and (2), generating a set of per-cycle rules (e.g., merging the FF component with the LF), retrieving the specific segment croppings and computing the segment amplitudes for the left, central and right cardiac cycles. Finally, in the synthetic composition step [<bold>(C)</bold>; green shading], the three cardiac cycles are independently generated by firstly generating a baseline of rest segments of sufficient size and adding over it the drawn segments. Then, they are concatenated into a synthetic trace and cropped into a single (central) cardiac cycle, discarding the grayed area.</p>
</caption>
<graphic xlink:href="fphys-15-1331852-g002.tif"/>
</fig>
<sec id="s2-2-1">
<title>2.2.1 Data pre-processing</title>
<p>The data pre-processing step consisted in two phases. In the first phase, the annotated ground truth was cropped in its fundamental segments, separating into independent &#x201c;sets of segments&#x201d; the LF, FF, LP, stimulation and rest segments. The FF and rest segments were low-pass filtered (100 Hz, 2nd order Butterworth filter) to suppress any unannotated LP in its trace. Moreover, each segment was onset/offset corrected so its voltage started and ended in zero for easier synthetic composition. Finally, the LF morphologies were subdivided into LF and LP morphologies according to whether the segment displayed a length shorter than 25 samples as a rule of thumb.</p>
<p>In the second phase, the segment&#x2019;s morphology was separated from its voltage by modelling its amplitude. Given that the amplitude profile of each segment (<italic>amplitude</italic>
<sub>segment</sub>) has a strong dependence with the amplitude of the LF component (<italic>amplitude</italic>
<sub>LF</sub>; see <xref ref-type="fig" rid="F3">Figure 3B</xref>), the segment amplitudes could not be fit in a single distribution. For this purpose, firstly, the amplitude of the LF was split into 10 bins (dividing the [0,100]% amplitude interval in increments of 10%). Secondly, for each LF amplitude bin, a log-normal distribution was fitted to model the amplitude distributions of the sub-set of FF and rest segments that accompanied each specific LF fragment, totalling 10 log-normal distributions per segment type. Finally, the amplitude of the LF and LP segments were fitted independently of the amplitude of any other fiducial, with log-normal distributions as well. Once the amplitudes had been fitted, all segments in the &#x201c;segment pools&#x201d; were normalized to their maximum absolute value (&#x201c;max abs&#x201d; scaling).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Histogram <bold>(A)</bold> and conditional distribution <bold>(B)</bold> of amplitudes (amp) of the cropped far field (FF) and rest segments with respect to the amplitude of the local field (LF). The histograms represent, in blue, the amplitudes of the segments and, overlaid in orange, the samples drawn from a log-normal distribution, demonstrating a good fit. The conditional distribution represents the kernel density estimates of the relative segment amplitude (<italic>y</italic>-axis) with respect to the LF amplitude of the cardiac cycle (<italic>x</italic>-axis), demonstrating larger segment amplitude at smaller local field amplitudes.</p>
</caption>
<graphic xlink:href="fphys-15-1331852-g003.tif"/>
</fig>
</sec>
<sec id="s2-2-2">
<title>2.2.2 Synthetic trace generation</title>
<p>The synthetic trace generation step aimed at producing bipolar EGM signals corresponding to a single cardiac cycle at a time. The resulting synthetic traces were intentionally crafted to deviate from strict physiological replication, in accordance with our clinical collaborators. This design decision was made because of the constraints posed by the size of the development dataset, which was comprised of few samples with manual annotations, which severely hindered the model&#x2019;s coverage of the real data distribution when used &#x201c;as-is&#x201d; for model training. In consequence, the generated synthetic traces intentionally cover iECG morphologies much beyond the ones found in the development set, by composing traces with pseudo-randomly located far field, local field, extrastimuli and DEEP activations.</p>
<p>Synthetic generation consisted of two steps. The first step revolved around probabilistically generating per-registry and per-cycle rules (see <xref ref-type="fig" rid="F2">Figure 2B</xref>). Per-registry rules governed conditions that affect all cardiac cycles within a registry, altering how the per-cycle rules were generated. To produce signals that are robust to QRS segmentation errors or to any physiological differences in LF/FF locations, three cardiac cycles were generated for each patient, which were then cropped to preserve the context of the central cardiac cycle (see <xref ref-type="fig" rid="F2">Figure 2C</xref>). Some examples of per-registry rules are the percentage of the left- and right-most cycles that is preserved, whether all cardiac cycles in a registry have the same morphology, or whether the registry contains stimulation artifacts. Per-cycle rules, on their behalf, governed conditions that affect a single cardiac cycle. For this purpose, different segments (LFs, FFs, LPs and rest segments) and their respective amplitudes were drawn from the sets of segments and amplitude distributions for each cardiac cycle. Given a pre-defined probability, some segments might not be drawn for a specific cardiac cycle (e.g., in the case of AV block, no ventricular activation might take place). If the &#x201c;same morphology&#x201d; boolean was toggled, the same segments were drawn for all cardiac cycles, although the amplitudes might vary. Finally, each segment positioned in some location ([0,100]%) of its corresponding cardiac cycle. A full description of the per-registry and per-cycle rules is reported in the Supplementary Materials.</p>
<p>After generating the per-registry and per-cycle rules, the final synthetic trace was composed. Firstly, the rest segments were multiplied by their respective amplitudes and concatenated to form a baseline upon which to place the rest of the segments. Then, each drawn segment (LFs, FFs and LPs) was multiplied by its amplitude and placed in the trace by adding it to the baseline, starting at a specific index, placing them spatially into the registry. These indices were kept in memory to generate the ground truth of the delineation, indicating the precise onset and offset of each segment. To maximize variability, each segment was given a chance to be interpolated to 75%&#x2013;125% its original length and a chance to be merged with another waveform using Mixup <xref ref-type="bibr" rid="B28">Zhang et al. (2018)</xref>, a data augmentation strategy that produces a linear combination of different segments. Finally, once all segments were added into the baseline, the noise and baseline wander were added to the trace and the final segment was cropped according to the &#x201c;RR&#x2019; percentage&#x201d; generated in the global conditions. <xref ref-type="fig" rid="F4">Figure 4</xref> in the Supplementary Material provides some examples of real and synthetic electrogram signals and traces, respectively. Differences can be observed in the figure between real and synthetic iECG data. However, the synthetically generated traces were not designed to serve as physiological replicas of real data, but to extend the limited original dataset to cover the large variability of iECG signals due to the characteristics of the acquisition and the underlying arrhyhthmia required to improve the training of segmentation models.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Examples of real <bold>(A&#x2013;D)</bold> and synthetic <bold>(E&#x2013;H)</bold> electrogram signals and traces, respectively. The green and magenta overlays represent the local and far field activations, respectively. Substantial differences can be observed between real signals and synthetic traces, the latest providing a larger variability in signal characteristics, making them more suitable for training segmentation models than a limited dataset of clinically-obtained electrogram data.</p>
</caption>
<graphic xlink:href="fphys-15-1331852-g004.tif"/>
</fig>
</sec>
</sec>
<sec id="s2-3">
<title>2.3 Architecture</title>
<p>The U-Net <xref ref-type="bibr" rid="B20">Ronneberger et al. (2015)</xref> is a state-of-the-art convolutional neural network (CNN) that is organized as an encoder-decoder structure and is usually employed in medical imaging segmentation tasks. The encoder-decoder is a type of artificial neural network (ANN) topology revolving around the usage of an encoder for obtaining highly abstract data representations (usually tied to reducing input complexity), and a decoder to leverage the abstracted information into a specific output <xref ref-type="bibr" rid="B14">LeCun et al. (2015)</xref>. In the case of the U-Net, the encoder and the decoder are conformed of convolutional operations, which act similarly to trainable digital filters and emphasize local relationships in data (either spatial or temporal, depending on the data to be analyzed), and pooling/upsampling operations, which allow models to train filters over more distant elements of the input image by reducing/increasing tensor size. Finally, the encoder and the decoder are connected by &#x201c;skip connections&#x201d;, which recover the input information at different levels of abstraction for: a) defining segmentation borders in a more precise manner, which could be lost with the pooling layers; and b) preventing problems arising from vanishing gradients when optimizing the model&#x2019;s weights <xref ref-type="bibr" rid="B20">Ronneberger et al. (2015)</xref>. The number of trainable convolutional filters is usually doubled after every pooling operation and halved after every upsampling operation.</p>
<p>Many U-Net-based alternatives exist due to its high performance for a variety of tasks <xref ref-type="bibr" rid="B15">Litjens et al. (2017)</xref>. Most works explore altering the model&#x2019;s original design decisions, such as the number of convolutional operations before any pooling operation (hereinafter, model &#x201c;width&#x201d;), the number of times the model reduces the input size (model &#x201c;depth&#x201d;), number of convolutional filters, employed non-linearity or choice of regularization <xref ref-type="bibr" rid="B12">Jimenez-Perez et al. (2021b)</xref>. Some authors have even developed heuristics for automatically adjusting the model&#x2019;s training parameters and reducing the developer&#x2019;s workload <xref ref-type="bibr" rid="B9">Isensee et al. (2021)</xref>. Other authors have attempted at incorporating state-of-the-art additions such as self-attention mechanisms <xref ref-type="bibr" rid="B21">Vaswani et al. (2017)</xref>, which allow the weights of an operation to be controlled by a secondary set of weights, effectively controlling feature importance <xref ref-type="bibr" rid="B18">Prabhakararao and Dandapat (2020)</xref>. While some adaptations of attention mechanisms exist for convolutional operations, this work explores the application of efficient channel attention (ECA) due to its low computational overhead <xref ref-type="bibr" rid="B22">Wang et al. (2020)</xref>.</p>
<p>Other works explore topological changes, either by embedding the U-Net into another structure <xref ref-type="bibr" rid="B23">Xia and Kulis (2017)</xref>; <xref ref-type="bibr" rid="B5">Chen et al. (2018)</xref> or by increasing its connectivity (number of times the output tensors from each convolutional operation are used) <xref ref-type="bibr" rid="B26">Zeng et al. (2019)</xref>. In this work, the W-Net architecture <xref ref-type="bibr" rid="B23">Xia and Kulis (2017)</xref> was employed given its good performance in other segmentation domains, such as the segmentation of echocardiographic images <xref ref-type="bibr" rid="B25">Xu et al. (2020)</xref>. The W-Net involves using two U-Nets, where the second network takes as input the output of the first network, and employ &#x201c;skip connections&#x201d; not only between each encoder/decoder pair but also between the decoder of the first U-Net and the encoder of the second. This second U-Net increases the model&#x2019;s capacity, which is usually tied to better performing models. A visual representation of the U-Net and the W-Net are presented in <xref ref-type="fig" rid="F5">Figure 5</xref>.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Representation of the U-Net (encircled in yellow) and W-Net architectures (encircled in red, containing the U-Net). Both networks are instantiated with 3 levels and 2 convolutional blocks per level. Arrows represent operations, while blocks are indicative of output tensors. Convolutional filters are doubled at each level, so that level <italic>L</italic>
<sub>
<italic>i</italic>
</sub> has 2<sup>
<italic>i</italic>
</sup>
<italic>N</italic> channels per level (with N being the starting number of channels), whereas pooling and upsampling have a kernel size of 2. Color code: convolutions (yellow), pooling operations (red), upsampling operations (blue), concatenation operations (black).</p>
</caption>
<graphic xlink:href="fphys-15-1331852-g005.tif"/>
</fig>
</sec>
<sec id="s2-4">
<title>2.4 Model evaluation</title>
<p>The model&#x2019;s performance was calculated in two ways: by evaluating the performance using typical delineation metrics; and by addressing the precision in a clinical validation dataset. Firstly, detection and delineation metrics were computed with respect to the ground truth. Detection metrics measured localized matches with the ground truth (i.e., segments occurring at the same time in the prediction and the ground truth). Delineation metrics, on their behalf, measured error at the localization of the segment&#x2019;s onset and offset with respect to the reference. The detection and delineation metrics were computed before and after filtering: given the large number of LPs detected within the confines of FF activations (see <xref ref-type="sec" rid="s3">Section 3</xref> and <xref ref-type="fig" rid="F6">Figure 6B</xref>), a secondary set of metrics was computed, consisting in measuring the aforementioned detection and delineation metrics, but avoiding counting these as false positives.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Representative examples of model predictions depicting good examples <bold>(A)</bold>, prediction errors caused by higher sensitivity than ground truth [<bold>(B)</bold>; high-frequency component within the far field], true prediction errors <bold>(C)</bold> and errors attributable to wrongly annotated ground truth <bold>(D)</bold>. The figures show the ECG reference (top), predicted fiducials (middle) and ground truth (bottom). Green and magenta regions represent local and far field components, respectively.</p>
</caption>
<graphic xlink:href="fphys-15-1331852-g006.tif"/>
</fig>
<p>With respect to the clinical validation metrics, sensitivity and specificity figures are reported for the accurate detection of decremental response in the annotated registries. For producing a prediction, five stages were followed. Firstly, the QRS complex was firstly detected using the delineator proposed in <xref ref-type="bibr" rid="B10">Jimenez-Perez et al. (2021a)</xref>. Secondly, the EGMs of each cardiac cycle were independently predicted, obtaining the onsets and offsets of each segment for each lead. Thirdly, a single onset-offset pair was selected across all leads by majority voting. This was useful for this specific clinical problem, given that the spatial configuration of the employed catheter allowed for certain synchronicity across leads (see <xref ref-type="sec" rid="s2-1">Section 2.1</xref>). In fourth place, a matching algorithm was employed to tie each stimuli to its response. For this purpose, the origin of the stimulation was firstly located (<italic>stim</italic>; auricular or ventricular origin) for, then, determining the delay to the response (<italic>resp</italic>; ventricular or auricular response, respectively). In this step, a series of exceptions were defined (e.g., uncoordinated stimulation-response, too distant response, absence of response or too different response morphology, among others), which lead to the exclusion of the excerpt for its posterior analysis. In fifth and final place, the distances between the stimulus and the response (&#x394;<italic>t</italic>
<sub>
<italic>i</italic>
</sub> &#x3d; <italic>resp</italic>
<sub>
<italic>i</italic>
</sub> &#x2212; <italic>stim</italic>
<sub>
<italic>i</italic>
</sub>) were computed. Given the stimulation protocol (single pacing followed by extrastimulus S<sub>2</sub>), the delay &#x394;<italic>t</italic> between the two last stimuli (&#x394;<italic>t</italic>
<sub>
<italic>N</italic>&#x2212;1</sub> and &#x394;<italic>t</italic>
<sub>
<italic>N</italic>
</sub>, respectively) was measured and decremental response was considered if (&#x394;<italic>t</italic>
<sub>
<italic>N</italic>&#x2212;1</sub> &#x2212; &#x394;<italic>t</italic>
<sub>
<italic>N</italic>&#x2212;1</sub>) &#x3e; 10 <italic>ms</italic>. The final value was corrected with the lag of the highest cross-correlation between the last two responses. <xref ref-type="fig" rid="F7">Figure 7</xref> depicts the decrement computation algorithm on a sample EGM.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Decrement computation algorithm on a non-decremental trace. In <bold>(A)</bold>, the surface ECG is delineated (red dotted line) and the stimulation onset is located (gray dotted line). In <bold>(B)</bold>, the detected QRS&#x2019; are employed to locate the onsets and offsets of the local field (green) and far field (magenta) activations for each bipolar electrode. In <bold>(C)</bold>, majority voting is performed to obtain a single set of onsets/offsets for all electrodes, the predictions are cleaned (e.g., spikes related to pacing) and the measurements are produced.</p>
</caption>
<graphic xlink:href="fphys-15-1331852-g007.tif"/>
</fig>
</sec>
<sec id="s2-5">
<title>2.5 Experiments</title>
<p>Model performance was assessed by training several model topologies, isolating specific changes to test the contribution of each element in the model. Firstly, the best architectural configuration was assessed by comparing the performance of the U-Net and W-Net (for depths 5 and 6, independently), both with and without ECA. Secondly, the effect of using a pre-trained model for the task of ECG delineation was tested, taking the weights from a model for ECG delineation <xref ref-type="bibr" rid="B11">Jimenez-Perez et al. (2019)</xref>, <xref ref-type="bibr" rid="B10">Jimenez-Perez et al. (2021a)</xref>, <xref ref-type="bibr" rid="B12">Jimenez-Perez et al. (2021b)</xref>. Finally, the effect of applying a loss function that forces higher sensitivity was explored by doubling the executions, comprising training models with and without the loss function. The loss function employed the edge detector described in <xref ref-type="bibr" rid="B10">Jimenez-Perez et al. (2021a)</xref> for computing the true positives (TP), false positives (FP) and false negatives (FN), which were in turn employed for computing the classic sensitivity score: <italic>Se</italic> (%) &#x3d; <italic>TP</italic>/(<italic>TP</italic> &#x2b; <italic>FN</italic>).</p>
<p>Some aspects were kept constant throughout all experiments. On the one hand, the application of some regularization strategies such as SDr or certain types of DA was associated with better performance, so these were always applied. A random seed (123456) was employed for reproducibility, the Adam optimizer was used <xref ref-type="bibr" rid="B13">Kingma and Ba (2014)</xref>, leaky ReLUs <xref ref-type="bibr" rid="B24">Xu et al. (2015)</xref> were selected as the non-linearities of choice, and the number of base channels was kept the same (32, doubled/halved on the pooling/upsampling operations). Due to limitations in the completeness of the annotated ground truth (see <xref ref-type="fig" rid="F6">Figure 6D</xref>), training was solely performed using synthetic data. However, as reported in <xref ref-type="bibr" rid="B10">Jimenez-Perez et al. (2021a)</xref>, this was associated with only a slight decrease in performance as compared to using synthetic and real data, and outperformed training the model only with real data. All executions were performed with a NVIDIA Titan Xp GPU using PyTorch.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<p>The best performing model was a W-Net model with 6 levels, optimized solely with the Dice loss. The model obtained precisions of 76.44%, 74.73% and of 100.0%, and recalls of 94.84%, 95.23% and 100.0% for localizing LF activations, FF activations and extrastimuli, respectively. The model also attained an average delineation error of 4.20 &#xb1; 13.89 and &#x2212;6.45 &#xb1; 19.86 ms when localizing the LF&#x2019;s onsets and offsets, respectively; and of 3.74 &#xb1; 19.26 and &#x2212;5.71 &#xb1; 21.91 ms when estimating the onsets and offsets of the FF. The localization of stimulations was very precise, with onset errors of &#x2212;0.68 &#xb1; 1.27 ms. Given the ambiguity between some segments and the errors in the dataset annotations (as it will be discussed in <xref ref-type="sec" rid="s4">Section 4</xref>), a metric was obtained by merging the binary masks of LF and FF components, which obtained a precision, recall, onset and offset errors of 90.02, 97.53, 83.52, 9.04 &#xb1; 26.09 and &#x2212;10.65 &#xb1; 29.32, respectively. A detailed description of the per-wave metrics of the model (precision, recall, Dice score, onset error and offset error) is reported in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Precision (%), recall (%), Dice score (%), onset error (mean [M] &#xb1; standard deviation [SD], in miliseconds) and offset errors (M &#xb1; SD, in miliseconds) of our best performing model.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="center">Precision (%)</th>
<th align="center">Recall (%)</th>
<th align="center">Dice (%)</th>
<th align="left">Onset error (M &#xb1; SD)</th>
<th align="left">Offset error (M &#xb1; SD)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Local Field</td>
<td align="center">76.44</td>
<td align="center">94.84</td>
<td align="center">77.37</td>
<td align="center">4.20 &#xb1; 13.89</td>
<td align="center">&#x2212;6.45 &#xb1; 19.86</td>
</tr>
<tr>
<td align="left">Far Field</td>
<td align="center">74.73</td>
<td align="center">95.23</td>
<td align="center">73.22</td>
<td align="center">3.74 &#xb1; 19.26</td>
<td align="center">&#x2212;5.71 &#xb1; 21.91</td>
</tr>
<tr>
<td align="left">Local &#x2b; Far Field</td>
<td align="center">90.02</td>
<td align="center">97.53</td>
<td align="center">83.52</td>
<td align="center">9.04 &#xb1; 26.09</td>
<td align="center">&#x2212;10.65 &#xb1; 29.32</td>
</tr>
<tr>
<td align="left">Stimulation</td>
<td align="center">100.0</td>
<td align="center">100.0</td>
<td align="center">94.78</td>
<td align="center">&#x2212;0.68 &#xb1; 1.27</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left">Local Field (&#x2264;25 ms)</td>
<td align="center">75.04</td>
<td align="center">67.98</td>
<td align="center">45.41</td>
<td align="center">1.51 &#xb1; 1.41</td>
<td align="center">&#x2212;5.69 &#xb1; 2.91</td>
</tr>
<tr>
<td align="left">Local Field (<inline-formula id="inf1">
<mml:math id="m1">
<mml:mo>&#x3e;</mml:mo>
</mml:math>
</inline-formula> 25 ms)</td>
<td align="center">80.77</td>
<td align="center">96.18</td>
<td align="center">78.68</td>
<td align="center">4.04 &#xb1; 13.42</td>
<td align="center">&#x2212;3.65 &#xb1; 16.67</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>A secondary set of measurements was computed by discarding as false positives any LF that occurred within the confines of a FF, as described in Section 2.5. With this secondary metric, the model obtained precisions of 91.28%, 77.78% and of 100.0%, and recalls of 94.86%, 95.25% and 100.0% for localizing LF activations, FF activations and extrastimuli, respectively. The model had an average delineation error of 3.89 &#xb1; 14.56 and &#x2212;6.16 &#xb1; 20.25 ms when localizing the LF&#x2019;s onsets and offsets, respectively; and of 3.47 &#xb1; 20.03 and &#x2212;5.44 &#xb1; 22.82 ms in the FF. A more in-depth report of the per-wave metrics of the model is reported in <xref ref-type="table" rid="T2">Table 2</xref>. Furthermore, some representative examples of the best performing model&#x2019;s performance have been plotted in <xref ref-type="fig" rid="F6">Figure 6</xref>. To aid in the discussion, the samples were grouped according to the different types of errors produced by the network (or absence of). These can be divided into four main categories: good samples (<xref ref-type="fig" rid="F6">Figure 6A</xref>), errors due to increased model sensitivity with respect to the ground truth (<xref ref-type="fig" rid="F6">Figure 6B</xref>), true network errors (<xref ref-type="fig" rid="F6">Figure 6C</xref>), and annotation errors in the database (<xref ref-type="fig" rid="F6">Figure 6D</xref>). Together with the real and synthetic examples depicted in <xref ref-type="fig" rid="F4">Figure 4</xref> in the Supplementary Material, these results demonstrate the appropriateness of training a segmentation model with a synthetic dataset including a large variability of characteristics, despite the obvious differences in signal morphology with real data, which can only be available in a limited number of settings.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Precision (%), recall (%), Dice score (%), onset error (mean [M] &#xb1; standard deviation [SD], in miliseconds) and offset errors (M &#xb1; SD, in miliseconds) of our best performing model after discarding small local field activations contained within far field activations.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="center">Precision (%)</th>
<th align="center">Recall (%)</th>
<th align="center">Dice (%)</th>
<th align="left">Onset error (M &#xb1; SD)</th>
<th align="left">Offset error (M &#xb1; SD)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Local Field</td>
<td align="center">91.28</td>
<td align="center">94.86</td>
<td align="center">77.37</td>
<td align="center">3.89 &#xb1; 14.56</td>
<td align="center">&#x2212;6.16 &#xb1; 20.25</td>
</tr>
<tr>
<td align="left">Far Field</td>
<td align="center">77.78</td>
<td align="center">95.25</td>
<td align="center">73.22</td>
<td align="center">3.47 &#xb1; 20.03</td>
<td align="center">&#x2212;5.44 &#xb1; 22.82</td>
</tr>
<tr>
<td align="left">Local Field &#x2b; Far Field</td>
<td align="center">91.39</td>
<td align="center">97.57</td>
<td align="center">83.52</td>
<td align="center">7.85 &#xb1; 28.52</td>
<td align="center">&#x2212;9.67 &#xb1; 31.77</td>
</tr>
<tr>
<td align="left">Stimulation</td>
<td align="center">100.0</td>
<td align="center">100.0</td>
<td align="center">94.78</td>
<td align="center">&#x2212;0.68 &#xb1; 1.27</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left">Local Field (&#x2264;25 ms)</td>
<td align="center">94.53</td>
<td align="center">67.98</td>
<td align="center">45.41</td>
<td align="center">1.51 &#xb1; 1.41</td>
<td align="center">&#x2212;5.69 &#xb1; 2.91</td>
</tr>
<tr>
<td align="left">Local Field (<inline-formula id="inf2">
<mml:math id="m2">
<mml:mo>&#x3e;</mml:mo>
</mml:math>
</inline-formula> 25 ms)</td>
<td align="center">94.06</td>
<td align="center">96.19</td>
<td align="center">78.68</td>
<td align="center">4.0 &#xb1; 13.51</td>
<td align="center">&#x2212;3.6 &#xb1; 16.76</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s3-1">
<title>3.1 Model additions</title>
<p>The only model addition that showed consistently better results with respect to the baseline was the application of increased model capacity (either with W-Net or with more model depth) and pre-training the model with weights from an ECG delineation model <xref ref-type="bibr" rid="B10">Jimenez-Perez et al. (2021a)</xref>. Other effects, such as the addition of custom data losses, were generally detrimental for model performance. <xref ref-type="fig" rid="F8">Figure 8</xref> summarizes the effect of the different model additions.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Boxplots of the contributions of the different model additions to the overall model performance, divided into the local field (left) and far field (right). <italic>Y</italic>-axis corresponds to the <italic>F</italic>
<sub>1</sub> score.</p>
</caption>
<graphic xlink:href="fphys-15-1331852-g008.tif"/>
</fig>
</sec>
<sec id="s3-2">
<title>3.2 Clinical validation</title>
<p>The clinical validation model demonstrated good overall agreement with respect to the evaluation of decremental properties. Out of the 321 recordings employed for evaluation of decremental response, 81 (25.23%) were automatically excluded by the rule-based algorithm. When compared to the exclusion criteria proposed by electrophysiologists, the exclusion step reached a sensitivity of 87.06% and a specificity of 97.03%. Out of the 240 remaining, 180 (75%) were evaluated to be decremental and 60 as non-decremental (94.42% accuracy, 96.77% sensitivity, 95.24% specificity). The selected model is not computationally expensive, producing a prediction in 18.9 &#xb1; 0.22 ms on GPU (NVidia GeForce GTX 1050 Ti), which is bound to be faster with more modern hardware.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<p>Electrogram segmentation is a crucial task for advancing in the automatization of EP procedures. Currently, physicians must manually produce basic measurements when performing interventions such as AVRT or AP ablation for determining decremental properties or to measure basic intervals. Despite its importance, even state-of-the-art EAM systems only perform basic detection of the most salient wave within a cardiac cycle for computing derived clinical indicators. The inability of performing full signal delineation is limiting, as recent developments in diagnostic markers for catheter ablation such as decrement-evoked potentials are detected through the analysis of portions of myocardial tissue that produce LFs or LPs that are delayed with respect to previous cardiac cycles.</p>
<p>The work presented here builds upon the existing detection and delineation literature by advancing towards an all-purpose iECG analysis system. Similarly to the approach proposed in <xref ref-type="bibr" rid="B10">Jimenez-Perez et al. (2021a)</xref>, a DL model was trained for automatic data quantification; focusing on quantification counterbalances the drawbacks of DL algorithms with an application that is immediately interpretable by the operator. Given the lack of large-scale iECG datasets annotated for delineation, two main design decisions were made. Firstly, the model was trained solely with synthetic data from a modest dataset of 312 iECG recordings from 77 distinct patients, with ground truth generated for localizing independent LF and FF activations. This synthetic dataset greatly improves model performance in scenarios where data is scarce, and has been proven to be more performant than training on real samples if the data is scarce <xref ref-type="bibr" rid="B10">Jimenez-Perez et al. (2021a)</xref>. Secondly, the prediction pipeline was designed to analyse excerpts of individual cardiac cycles, whose window of interest was localized with the QRS complex&#x2019;s barycenter in the surface ECG using a DL model <xref ref-type="bibr" rid="B10">Jimenez-Perez et al. (2021a)</xref>. Cropping the iECG recordings into individual cardiac cycles allowed the model to adjust the prediction of a specific waveform according to whether the LFs (high frequency components) occurred before or during ventricular depolarization. The combination of these design decisions allowed to alleviate the main limitations found in initial approaches, producing more versatile networks.</p>
<p>Although many models and model additions were explored for pushing performance, model performance seemed to respond similarly to the explored changes (<xref ref-type="fig" rid="F8">Figure 8</xref>). Moreover, the trained models swored a high variance overall in <italic>F</italic>
<sub>1</sub> score, and neither changing model capacity (5 or 6 U-Net/W-Net levels), changing the loss functions (Dice score or new losses) or changing the base architecture (U-Net or W-Net) seemed to significantly improve performance. The only clear improvement in both LF and FF <italic>F</italic>
<sub>1</sub> scores seemed to be starting the training from a model pretrained with an ECG delineation task <xref ref-type="bibr" rid="B10">Jimenez-Perez et al. (2021a)</xref>, which is consistent to the recent advancements in Self-Supervised pretraining of Computer Vision models (<xref ref-type="bibr" rid="B29">Caron et al., 2021</xref>). We hypothesize that one of the factors that cause this variance is the need to add more training data, which is also hinted by the high amount of runs that did not produce a model that consistently converged (i.e., <italic>F</italic>
<sub>1</sub> scores neighbouring 60%).</p>
<p>The best performing model demonstrated high sensitivity but moderate precision (around 95% and 75%, respectively, for both LF and FF activations in a held-out test set). With respect to the onset/offset localization, the models provided a good fit with respect to the reference (errors of 3.89 &#xb1; 14.56 and &#x2212;6.16 &#xb1; 20.25 ms when estimating the LF&#x2019;s onsets and offsets, respectively; and of 3.47 &#xb1; 20.03 and &#x2212;5.44 &#xb1; 22.82 ms at the FF components). Comparing the proposed approach to the existing literature gives the impression of a reduced algorithm performance: some methods reach precision and recall figures nearing 100% <xref ref-type="bibr" rid="B16">Osorio et al. (2017)</xref>; <xref ref-type="bibr" rid="B7">Felix et al. (2015)</xref> and half the SD in onset/offset localization <xref ref-type="bibr" rid="B4">Alcaine et al. (2014)</xref>. This, however, is misleading for several reasons. Firstly, existing algorithms are only concerned with locating a single LF activation for each cardiac cycle and disregard any other type of activation (e.g., LP or FF), which prevents direct comparison between methodologies. Secondly, all development datasets are private, preventing a fair comparison of methods; the dataset collected for this work consists of real clinical data, making no compromises with respect to signal quality or difficulty. Thirdly, models that are more sensible than specific were sought for, and distinguishing subtle LPs from noise is a challenging task. Finally, the larger delineation errors are to be expected given smoothness at signal initiation and termination (see <xref ref-type="fig" rid="F9">Figure 9</xref>) and the lack of an unified criterion for their definition. Despite the comparatively reduced detection and delineation metrics, the overall performance at locating specific components has proved excellent for a downstream clinical application for the detection of decremental response in AP or AVRT procedures. The model, with a relatively simple post-processing, allowed for the identification of decremental response (&#x394;<italic>t</italic> &#x3e; 10 <italic>ms</italic>) with high precision and accuracy, reaching sensitivity and specificity figures of 96.77% and 95.24% specificity, respectively.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>The smoothness of the wave complicates the definition of the local field&#x2019;s offset (red dashed line) and the far field&#x2019;s onset and offset (cyan and magenta dashed lines, respectively). Multiple possible onsets/offsets are marked.</p>
</caption>
<graphic xlink:href="fphys-15-1331852-g009.tif"/>
</fig>
<p>The proposed approach has two main advantages. Most importantly, a full delineation of all important iECG fiducials in the registry is performed, as opposed to the localization of the most salient component <xref ref-type="bibr" rid="B16">Osorio et al. (2017)</xref>; <xref ref-type="bibr" rid="B7">Felix et al. (2015)</xref>; <xref ref-type="bibr" rid="B4">Alcaine et al. (2014)</xref>. This is of capital importance at advancing processing for EP procedures and reducing intervention times, as many diagnosis procedures are performed by comparing segments or LP in subsequent cardiac cycles. Additionally, using a synthetic data generation algorithm allows to better control the conditions for predicting a local component, which is highly beneficial: the low specificity reported in <xref ref-type="table" rid="T1">Table 1</xref> results from lowering the threshold at which a perturbation can be recognized as a local component (see <xref ref-type="fig" rid="F6">Figure 6B</xref>). Thus, the system is able to propose low intensity, high frequency deflections as candidate local components, which would be too costly and time-consuming to annotated while not necessarily erroneous. The difference between the ground truth and the predictions might represent a limitation of the ground truth rather than of the developed model.</p>
<p>The primary objective of the synthetic data generation was not to be used as realistic data for clinical practice, but to force the model to identify specific iEGM components such as local field, far field, and DEEP signals, including possible changes in signal acquisition (e.g., different type of catheters). The resulting intentional deviation from strict physiological replication proved beneficial, even at a slight loss of realism. This tradeoff between variability and realism in the generated signals is not significantly different from usual data augmentation strategies found in the deep learning literature, in which extreme transformations over the base image are performed but not necessarily evaluated for realism (e.g., the recent GIN-IPA data augmentation technique (<xref ref-type="bibr" rid="B30">Ouyang et al., 2023</xref>)). In consequence, it is not straightforward to make a direct comparison between real and synthetic data using similarity-based metrics (e.g., cross-correlation). However, the developed segmentation and classification models were trained exclusively on synthetic data, their accuracy on held-out datasets of real data being a very strong indirect evaluation of the usefulness of the synthetic generation pipeline.</p>
<p>The proposed approach has, however, some limitations that are unique to EGMs as opposed to other cardiac signals such as the ECG. Firstly, expressing the ground truth as a binary mask delimiting each local component, as is performed in this work, might clash with some scenarios where the individual local components should not be merged, giving rise to difficulties when analyzing highly fractionated potentials, where predicting a continuous <italic>True</italic>-valued binary mask spanning the whole fractionation might not be useful for posterior analyses. Secondly, a compromise with respect to the architectural choice might be of need, as the model prediction time is larger than the sampling frequency (7.88 ms per cardiac cycle and lead). This, however, might be circumvented by good implementation in an EAM platform, by multi-threading, processing the iECG while the catheter changes position or the system waits for respiration cues or by providing the outputs with a slight delay. Thirdly, the model could not be trained leveraging real data, partially due to the necessity to improve the quality of the ground truth annotations: many waves were not correctly delineated and accounted for false positives (<xref ref-type="fig" rid="F6">Figure 6D</xref>), requiring re-annotation, and more prevalence of fractionated potentials is needed to assess the generalizability of our approach. Finally, the developed rules for the synthetic DA algorithm allow for much higher complexity, requiring the inclusion of more real-world casuistry to enhance performance.</p>
</sec>
<sec sec-type="conclusion" id="s5">
<title>5 Conclusion</title>
<p>The proposed methodology for the analysis of iECG recordings has proven to be useful in other signal analysis tasks such as ECG delineation <xref ref-type="bibr" rid="B10">Jimenez-Perez et al. (2021a)</xref>, hinting at the feasibility of a good-performing, all-purpose EGM annotation tool. Current results show great promise while being, to the best of our knowledge, the first tool in the literature allowing the delineation of all local components present in a recording. The algorithm, based on an encoder-decoder DL architecture, was trained solely with synthetic data according to a rule-based algorithm that allows for controlling the generation process. The algorithm is, however, faced with several limitations in the dataset, data generation and data representation. Nevertheless, the development of an all-purpose EGM delineation model is a key tool for unlocking a wide array of downstream tasks, ranging from the automatic identification of myocardial portions of scar presenting DEEPs to the exploration of morphological indicators that might aid in diagnosis or risk stratification.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The datasets presented in this article are not readily available because Patient data is not available outside of the hospital. Requests to access the datasets should be directed to <email>juan.acostamartinez@gmail.com</email>.</p>
</sec>
<sec id="s7">
<title>Ethics statement</title>
<p>The studies involving humans were approved by CEIC Hospital Uiversitario Virgen del Rocio. The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study.</p>
</sec>
<sec id="s8">
<title>Author contributions</title>
<p>GJ-P: Writing&#x2013;review and editing, Writing&#x2013;original draft, Visualization, Validation, Software, Methodology, Investigation, Formal Analysis, Data curation, Conceptualization. JA: Writing&#x2013;review and editing, Validation, Supervision, Resources, Project administration, Investigation, Funding acquisition, Formal Analysis, Data curation, Conceptualization. &#xc1;B-P P&#xe9;rez: Writing&#x2013;review and editing, Visualization, Software, Investigation. EA-R: Writing&#x2013;review and editing, Investigation, Formal Analysis, Data curation. MF-L: Writing&#x2013;review and editing, Validation, Formal Analysis, Data curation, Conceptualization. JS-B: Writing&#x2013;review and editing, Validation, Formal Analysis, Data curation, Conceptualization. HL-L: Writing&#x2013;review and editing, Validation, Formal Analysis, Data curation, Conceptualization. RP: Writing&#x2013;review and editing, Validation, Formal Analysis, Data curation, Conceptualization. CC: Writing&#x2013;review and editing, Formal Analysis, Data curation, Conceptualization. OC: Writing&#x2013;review and editing, Writing&#x2013;original draft, Visualization, Supervision, Resources, Methodology. AP: Writing&#x2013;review and editing, Supervision, Resources, Project administration, Investigation, Funding acquisition, Formal Analysis, Data curation, Conceptualization.</p>
</sec>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This research was supported by the Secretariat for Universities and Research of the Government of Catalonia (2017 FI_B 01008) and by grants from the Consejer&#xed;a de Salud y Consumo of the Junta de Andaluc&#xed;a (PI-0500-2019).</p>
</sec>
<sec sec-type="COI-statement" id="s10">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Acosta</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Andreu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Penela</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Cabrera</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Carlosena</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Korshunov</surname>
<given-names>V.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Elucidation of hidden slow conduction by double ventricular extrastimuli: a method for further arrhythmic substrate identification in ventricular tachycardia ablation procedures</article-title>. <source>EP Eur.</source> <volume>20</volume>, <fpage>337</fpage>&#x2013;<lpage>346</lpage>. <pub-id pub-id-type="doi">10.1093/europace/euw325</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Acosta</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Soto-Iglesias</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>J&#xe1;uregui</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Armenta</surname>
<given-names>J. F.</given-names>
</name>
<name>
<surname>Penela</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Frutos-L&#xf3;pez</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Long-term outcomes of ventricular tachycardia substrate ablation incorporating hidden slow conduction analysis</article-title>. <source>Heart rhythm.</source> <volume>17</volume>, <fpage>1696</fpage>&#x2013;<lpage>1703</lpage>. <pub-id pub-id-type="doi">10.1016/j.hrthm.2020.05.017</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Alcaine</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Soto-Iglesias</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Andreu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Fernandez-Armenta</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Berruezo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Laguna</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). &#x201c;<article-title>Wavelet-based electrogram onset identification for ventricular electroanatomical mapping</article-title>,&#x201d; in <source>Computing in cardiology 2013</source>, <fpage>615</fpage>&#x2013;<lpage>618</lpage>.</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alcaine</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Soto-Iglesias</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Calvo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Guiu</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Andreu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Fern&#xe1;ndez-Armenta</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>A wavelet-based electrogram onset delineator for automatic ventricular activation mapping</article-title>. <source>IEEE Trans. Biomed. Eng.</source> <volume>61</volume>, <fpage>2830</fpage>&#x2013;<lpage>2839</lpage>. <pub-id pub-id-type="doi">10.1109/TBME.2014.2330847</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Caron</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Touvron</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Misra</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>J&#x00e9;gou</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Mairal</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bojanowski</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Emerging properties in self-supervised vision transformers</article-title>. <source>Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)</source>, <fpage>9650</fpage>&#x2013;<lpage>9660</lpage>.</citation>
</ref>
<ref id="B5">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>L.-C.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Papandreou</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Schroff</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Adam</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Encoder-decoder with atrous separable convolution for semantic image segmentation</article-title>,&#x201d; in <source>Computer vision &#x2013; eccv 2018</source>. Editors <person-group person-group-type="editor">
<name>
<surname>Ferrari</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Hebert</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sminchisescu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Weiss</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer International Publishing</publisher-name>), <fpage>833</fpage>&#x2013;<lpage>851</lpage>.</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Faes</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Nollo</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Antolini</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Gaita</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Ravelli</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>A method for quantifying atrial fibrillation organization based on wave-morphology similarity</article-title>. <source>IEEE Trans. Biomed. Eng.</source> <volume>49</volume>, <fpage>1504</fpage>&#x2013;<lpage>1513</lpage>. <pub-id pub-id-type="doi">10.1109/TBME.2002.805472</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Felix</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Alcaraz</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Rieta</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Adaptive wavelets applied to automatic local activation wave detection in fractionated atrial electrograms of atrial fibrillation</article-title>,&#x201d; in <source>2015 computing in cardiology conference (CinC)</source>, <fpage>45</fpage>&#x2013;<lpage>48</lpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hajimolahoseini</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hashemi</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gazor</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Redfearn</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Inflection point analysis: a machine learning approach for extraction of IEGM active intervals during atrial fibrillation</article-title>. <source>Artif. Intell. Med.</source> <volume>85</volume>, <fpage>7</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1016/j.artmed.2018.02.003</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Isensee</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Jaeger</surname>
<given-names>P. F.</given-names>
</name>
<name>
<surname>Kohl</surname>
<given-names>S. A. A.</given-names>
</name>
<name>
<surname>Petersen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Maier-Hein</surname>
<given-names>K. H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation</article-title>. <source>Nat. Methods</source> <volume>18</volume>, <fpage>203</fpage>&#x2013;<lpage>211</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-020-01008-z</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Jimenez-Perez</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Acosta</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Alcaine</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Camara</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2021a</year>). <source>Generalizing electrocardiogram delineation &#x2013; training convolutional neural networks with synthetic data augmentation</source>. <comment>arXiv preprint</comment>.</citation>
</ref>
<ref id="B11">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Jimenez-Perez</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Alcaine</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Camara</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>U-net architecture for the automatic detection and delineation of the electrocardiogram</article-title>,&#x201d; in <source>CinC</source> (<publisher-name>IEEE</publisher-name>), <volume>46</volume>, <fpage>1</fpage>&#x2013;<lpage>4</lpage>. <pub-id pub-id-type="doi">10.22489/cinc.2019.284</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jimenez-Perez</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Alcaine</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Camara</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2021b</year>). <article-title>Delineation of the electrocardiogram with a mixed-quality-annotations dataset using convolutional neural networks</article-title>. <source>Sci. Rep.</source> <volume>11</volume>, <fpage>863</fpage>&#x2013;<lpage>911</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-020-79512-7</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kingma</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Ba</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2014</year>). <source>Adam: a method for stochastic optimization</source>. <comment>arXiv preprint</comment>.</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>LeCun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Deep learning</article-title>. <source>Nature</source> <volume>521</volume>, <fpage>436</fpage>&#x2013;<lpage>444</lpage>. <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Litjens</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kooi</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Bejnordi</surname>
<given-names>B. E.</given-names>
</name>
<name>
<surname>Setio</surname>
<given-names>A. A. A.</given-names>
</name>
<name>
<surname>Ciompi</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Ghafoorian</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>A survey on deep learning in medical image analysis</article-title>. <source>Med. Image Anal.</source> <volume>42</volume>, <fpage>60</fpage>&#x2013;<lpage>88</lpage>. <pub-id pub-id-type="doi">10.1016/j.media.2017.07.005</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Osorio</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Alcaraz</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Rieta</surname>
<given-names>J. J.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>A fractionation-based local activation wave detector for atrial electrograms of atrial fibrillation</article-title>,&#x201d; in <source>2017 computing in cardiology (CinC)</source>, <fpage>1</fpage>&#x2013;<lpage>4</lpage>.</citation>
</ref>
<ref id="B30">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Ouyang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Qin</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Causality-inspired single-source domain generalization for medical image segmentation</article-title>. <source>IEEE Trans. Med. Imaging</source> <volume>42</volume>, <fpage>1095</fpage>&#x2013;<lpage>1106</lpage>.</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Porta-S&#xe1;nchez</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Jackson</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Lukac</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kristiansen</surname>
<given-names>S. B.</given-names>
</name>
<name>
<surname>Nielsen</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Gizurarson</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Multicenter study of ischemic ventricular tachycardia ablation with decrement-evoked potential (DEEP) mapping with extra stimulus</article-title>. <source>JACC Clin. Electrophysiol.</source> <volume>4</volume>, <fpage>307</fpage>&#x2013;<lpage>315</lpage>. <pub-id pub-id-type="doi">10.1016/j.jacep.2017.12.005</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Prabhakararao</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Dandapat</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Myocardial infarction severity stages classification from ECG signals using attentional recurrent neural network</article-title>. <source>IEEE Sensors J.</source> <volume>20</volume>, <fpage>8711</fpage>&#x2013;<lpage>8720</lpage>. <pub-id pub-id-type="doi">10.1109/jsen.2020.2984493</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rodrigo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rogers</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ganesan</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Krittanawong</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Alhusseini</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Narayan</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Classification of individual atrial intracardiac electrograms by Deep Learning</article-title>. <source>J. Am. Coll. Cardiol.</source> <volume>77</volume>, <fpage>3217</fpage>. <pub-id pub-id-type="doi">10.1016/s0735-1097(21)04572-1</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Ronneberger</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Fischer</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Brox</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>U-Net: convolutional networks for biomedical image segmentation</article-title>,&#x201d; in <source>Miccai</source> (<publisher-name>Springer</publisher-name>), <fpage>234</fpage>&#x2013;<lpage>241</lpage>.</citation>
</ref>
<ref id="B21">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Vaswani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shazeer</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Parmar</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Uszkoreit</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jones</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gomez</surname>
<given-names>A. N.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). &#x201c;<article-title>Attention is all you need</article-title>,&#x201d; in <source>NIPS</source>, <fpage>5998</fpage>&#x2013;<lpage>6008</lpage>.</citation>
</ref>
<ref id="B22">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zuo</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>ECA-net: efficient channel attention for deep convolutional neural networks</article-title>,&#x201d; in <source>Cvpr</source>, <fpage>11534</fpage>&#x2013;<lpage>11542</lpage>.</citation>
</ref>
<ref id="B23">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Xia</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Kulis</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2017</year>). <source>W-net: a deep model for fully unsupervised image segmentation</source>. <comment>arXiv preprint</comment>.</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Empirical evaluation of rectified activations in convolutional network</article-title>. <source>Comput. Res. Repos. (CoRR)</source>. <comment>abs/1505.00853</comment>. <pub-id pub-id-type="doi">10.48550/arXiv.1505.00853</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Convolutional-neural-network-based approach for segmentation of apical four-chamber view from fetal echocardiography</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>80437</fpage>&#x2013;<lpage>80446</lpage>. <pub-id pub-id-type="doi">10.1109/access.2020.2984630</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zeng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Dense-U-Net: densely connected convolutional network for semantic segmentation with a small number of samples</article-title>,&#x201d; in <source>10th international conference on graphics and image processing, ICGIP 2018</source>. Editors <person-group person-group-type="editor">
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Pu</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<publisher-loc>SPIE</publisher-loc>: <publisher-name>International Society for Optics and Photonics</publisher-name>), <volume>11069</volume>, <fpage>665</fpage>&#x2013;<lpage>670</lpage>. <pub-id pub-id-type="doi">10.1117/12.2524406</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zeppenfeld</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Porta-S&#xe1;nchez</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Automated functional substrate mapping: further hurdles to Be cleared</article-title>. <source>JACC Clin. Electrophysiol.</source> <volume>6</volume>, <fpage>1794</fpage>&#x2013;<lpage>1796</lpage>. <pub-id pub-id-type="doi">10.1016/j.jacep.2020.06.032</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Cisse</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Dauphin</surname>
<given-names>Y. N.</given-names>
</name>
<name>
<surname>Lopez-Paz</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Mixup: beyond empirical risk minimization</article-title>,&#x201d; in <source>International conference on learning representations</source>.</citation>
</ref>
</ref-list>
</back>
</article>