<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Med.</journal-id>
<journal-title>Frontiers in Medicine</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Med.</abbrev-journal-title>
<issn pub-type="epub">2296-858X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmed.2024.1377479</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Medicine</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Deep learning based retinal vessel segmentation and hypertensive retinopathy quantification using heterogeneous features cross-attention neural network</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Liu</surname> <given-names>Xinghui</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2626150/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Tan</surname> <given-names>Hongwen</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Wu</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Chen</surname> <given-names>Zhangrong</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1771454/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>School of Clinical Medicine, Guizhou Medical University</institution>, <addr-line>Guiyang</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Cardiovascular Medicine, Guizhou Provincial People&#x00027;s Hospital</institution>, <addr-line>Guiyang</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>Electrical Engineering College, Guizhou University</institution>, <addr-line>Guiyang</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>Department of Cardiovascular Medicine, The Affiliated Hospital of Guizhou Medical University</institution>, <addr-line>Guiyang</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Yanda Meng, University of Exeter, United Kingdom</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Xu Chen, University of Cambridge, United Kingdom</p>
<p>Peng Xue, Shandong University, China</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Zhangrong Chen <email>chenzhangrong71&#x00040;163.com</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>22</day>
<month>05</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>11</volume>
<elocation-id>1377479</elocation-id>
<history>
<date date-type="received">
<day>27</day>
<month>01</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>05</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2024 Liu, Tan, Wang and Chen.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Liu, Tan, Wang and Chen</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Retinal vessels play a pivotal role as biomarkers in the detection of retinal diseases, including hypertensive retinopathy. The manual identification of these retinal vessels is both resource-intensive and time-consuming. The fidelity of vessel segmentation in automated methods directly depends on the fundus images&#x00027; quality. In instances of sub-optimal image quality, applying deep learning-based methodologies emerges as a more effective approach for precise segmentation. We propose a heterogeneous neural network combining the benefit of local semantic information extraction of convolutional neural network and long-range spatial features mining of transformer network structures. Such cross-attention network structure boosts the model&#x00027;s ability to tackle vessel structures in the retinal images. Experiments on four publicly available datasets demonstrate our model&#x00027;s superior performance on vessel segmentation and the big potential of hypertensive retinopathy quantification.</p></abstract>
<kwd-group>
<kwd>retinal vessel segmentation</kwd>
<kwd>hypertensive retinopathy quantification</kwd>
<kwd>deep learning</kwd>
<kwd>cross-attention network</kwd>
<kwd>color fundus images</kwd>
</kwd-group>
<counts>
<fig-count count="3"/>
<table-count count="6"/>
<equation-count count="4"/>
<ref-count count="52"/>
<page-count count="10"/>
<word-count count="7279"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Ophthalmology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Hypertension (HT) is a chronic ailment posing a profound menace to human wellbeing, manifesting in vascular alterations (<xref ref-type="bibr" rid="B1">1</xref>). Its substantial contribution to the global prevalence and fatality rates of cardiovascular diseases (CVD) cannot be overstated. The escalated incidence and mortality rates are not solely attributable to HT&#x00027;s correlation with CVD but also to the ramifications of hypertension-mediated organ damage (HMOD). This encompasses structural and functional modifications across pivotal organs, including arteries, heart, brain, kidneys, vessels, and the retina, signifying preclinical or asymptomatic CVD (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B3">3</xref>). HT management&#x00027;s principal aim remains to deter CVD incidence and mortality rates. Achieving this goal mandates meticulous adherence to HT guidelines, emphasizing precise blood pressure monitoring and evaluating target organ damage (<xref ref-type="bibr" rid="B4">4</xref>). Consequently, the early identification of HT-mediated organ damage emerges as a pivotal concern.</p>
<p>The retinal vascular system shares commonalities in structural, functional, and embryological aspects with the vascular systems of the heart, brain, and kidneys (<xref ref-type="bibr" rid="B5">5</xref>&#x02013;<xref ref-type="bibr" rid="B9">9</xref>). Compared to other microvascular territories, the distinctive attributes of the retinal microcirculation enable relatively straightforward detection of localized HMOD (<xref ref-type="bibr" rid="B5">5</xref>, <xref ref-type="bibr" rid="B9">9</xref>). Its capacity to offer a non-invasive and uncomplicated diagnostic tool positions retinal visualization as the simplest means of elucidating the microcirculatory system. In hypertensive patients, retinal microvasculature gives insight into the wellbeing of the heart, kidneys, and brain (<xref ref-type="bibr" rid="B5">5</xref>, <xref ref-type="bibr" rid="B10">10</xref>, <xref ref-type="bibr" rid="B11">11</xref>). Early detection of HT-mediated retinal changes indirectly mirrors the vascular status of these organs, facilitating refined evaluation of cardiovascular risk stratification, timely interventions, and improved prognostication, thereby holding substantial clinical significance. Traditional clinical methodologies for diagnosing HT-mediated retinal alterations, while reliant on the proficiency of ophthalmic professionals, often demand considerable time and specialized expertise (<xref ref-type="bibr" rid="B12">12</xref>). <xref ref-type="fig" rid="F1">Figure 1</xref> presents a sample fundus image, demonstrating the complexity of the retinal vasculature and image intensity variation. However, integrating AI-based models in ophthalmology holds promising prospects for revolutionizing this paradigm. Leveraging machine learning algorithms and deep neural networks, AI-enabled diagnostic tools have demonstrated the potential to expedite and enhance the assessment of HT-related retinal vessel changes (<xref ref-type="bibr" rid="B13">13</xref>&#x02013;<xref ref-type="bibr" rid="B17">17</xref>). These AI models learn from extensive datasets of annotated medical images, swiftly recognizing subtle retinal anomalies that might elude human detection. By automating the analysis and interpretation of retinal images, AI-based systems offer the prospect of reducing diagnostic timeframes, improving accuracy, and potentially mitigating the need for extensive human oversight. In this work, we proposed a heterogeneous features cross-attention neural network to tackle the retinal vessel segmentation task with color fundus images.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Sample retinal fundus image for vessel segmentation and hypertensive retinopathy quantification. The yellow areas in Ground Truth represent the retinal vessel area that needs to be segmented for disease analysis.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmed-11-1377479-g0001.tif"/>
</fig>
</sec>
<sec id="s2">
<title>2 Related work</title>
<p>Segmenting blood vessels in retinal color fundus images plays a pivotal role in the diagnostic process of hypertensive retinopathy. Over the years, researchers have explored computer-assisted methodologies to tackle this task. For instance, Annunziata and Trucco (<xref ref-type="bibr" rid="B18">18</xref>) introduced a novel curvature segmentation technique leveraging an accelerating filter bank implemented via a speed-up convolutional sparse coding filter learning approach. Their method employs a warm initialization strategy, kickstarted by meticulously crafted filters. These filters are adept at capturing the visual characteristics of curvilinear structures, subsequently fine-tuned through convolutional sparse coding. Similarly, Mar&#x000ED;n et al. (<xref ref-type="bibr" rid="B19">19</xref>) delved into the realm of hand-crafted feature learning methods, harnessing gray-level and moment invariant-based features for vessel segmentation. However, despite the efficacy of such techniques, the manual crafting of filters is inherently time-intensive and prone to biases, necessitating a shift toward more automated and data-driven approaches in this domain.</p>
<p>Deep learning techniques based on data analysis have demonstrated superior performance to conventional retinal vessel segmentation approaches (<xref ref-type="bibr" rid="B18">18</xref>&#x02013;<xref ref-type="bibr" rid="B20">20</xref>). For instance, Maninis et al. (<xref ref-type="bibr" rid="B21">21</xref>) developed a method wherein feature maps derived from a side output layer contributed to vessel and optic disc segmentation. Along a similar line, Oliveira et al. (<xref ref-type="bibr" rid="B22">22</xref>) combined the benefits of stationary wavelet transform&#x00027;s multi-scale analysis with a multi-scale full convolutional neural network, resulting in a technique adept at accommodating variations in the width and orientation of retinal vessel structures. In terms of exploiting the advance of the Unit structure, there are previous methods that achieved promising performance. For example, Yan et al. (<xref ref-type="bibr" rid="B23">23</xref>) implemented a joint loss function in U-Net, comprising two components responsible for pixel-wise and segment-level losses, aiming to enhance the model&#x00027;s ability to balance segmentation between thicker and thinner vessels. Mou et al. (<xref ref-type="bibr" rid="B24">24</xref>) embedded dense dilated convolutional blocks between encoder and decoder cells at corresponding levels of a U-shaped network, employing a regularized walk algorithm for post-processing model predictions. Similarly, Wang et al. (<xref ref-type="bibr" rid="B25">25</xref>) proposed a Dual U-Net with two encoders: one focused on spatial information extraction and the other on context information. They introduced a novel module to merge information from both paths.</p>
<p>Despite the proficiency of existing deep learning methodologies in segmenting thicker vessels, there remains a challenge in combining heterogeneous features from different stages of the deep learning models via Transformers and CNN models. Generally, improving deep learning-based techniques for vessel segmentation can be approached from various angles, including multi-stage feature fusion and optimization of loss functions. This work proposes a heterogeneous feature cross-attention neural network to address the above challenge.</p>
</sec>
<sec sec-type="materials and methods" id="s3">
<title>3 Materials and methods</title>
<sec>
<title>3.1 Heterogeneous features cross-attention neural network</title>
<p>A detailed model structure overview is shown in <xref ref-type="fig" rid="F2">Figure 2</xref>. In detail, two brunches of feature extraction modules are proposed to extract heterogeneous features from different stages of the backbone network. In detail, there is CNN-based (Conv-Block) and transformer-based (Trans-Block) brunch, which focus on local semantic and long-range spatial information. Those two features&#x00027; information are both important for the vessel segmentation task.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Figure of our proposed model structure. Our model contains three modules, including Trans-Block, CNN-Block and Fusion-Block. The detailed structure of each module is shown in the figure.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmed-11-1377479-g0002.tif"/>
</fig>
<p>The interaction between the two branches is used as a cross-attention module to emphasize the essential heterogeneous (semantic and spatial) features. It is used as the main structure to facilitate the interaction and integration of local and long-range global features. Drawing inspiration from the work by Peng et al. (<xref ref-type="bibr" rid="B26">26</xref>), the intersecting network architecture within our model ensures that both Conv-Block and Trans-Block can concurrently learn features derived from the preceding Conv-Block and Trans-Block, respectively.</p>
<sec>
<title>3.1.1 CNN blocks</title>
<p>In the structure depicted in <xref ref-type="fig" rid="F2">Figure 2</xref>, the CNN branch adopts a hierarchical structure, leading to a reduction in the resolution of feature maps as the network depth increases and the channel count expands. Each phase of this structure consists of several convolution blocks, each housing multiple bottlenecks. These bottlenecks, in accordance with the ResNet framework (<xref ref-type="bibr" rid="B27">27</xref>), comprise a sequence involving down-projection, spatial convolution, up-projection, and a residual connection to maintain information flow within the block. Distinctly, visual transformers (<xref ref-type="bibr" rid="B28">28</xref>) condense an image patch into a vector in one step, which unfortunately leads to the loss of localized details. Conversely, in CNNs, the convolutional kernels operate on feature maps, overlapping to retain intricate local features. Consequently, the CNN branch ensures a sequential provision of localized feature intricacies to benefit the transformer branch.</p>
</sec>
<sec>
<title>3.1.2 Transformer blocks</title>
<p>In line with the approach introduced in ViT (<xref ref-type="bibr" rid="B28">28</xref>), this segment consists of N sequential transformer blocks, as showcased in <xref ref-type="fig" rid="F2">Figure 2</xref>. Each transformer block combines a multi-head self-attention module with an MLP block, encompassing an up-projection fully connected layer and a down-projection fully connected layer. Throughout this structure, LayerNorms (<xref ref-type="bibr" rid="B29">29</xref>) are applied before each layer, and residual connections are integrated into both the self-attention layer and the MLP block. For tokenization purposes, the feature maps generated by the backbone module are compressed into 16 &#x000D7; 16 patch embeddings without overlap. This compression is achieved using a linear projection layer, implemented via a 3 &#x000D7; 3 convolution with a stride of 1. Notably, considering that the CNN branch (3 &#x000D7; 3 convolution) encodes both local features and spatial location information, the necessity for positional embeddings diminishes. This strategic adaptation results in an improved image resolution, advantageous for subsequent tasks related to vision.</p>
</sec>
<sec>
<title>3.1.3 Feature fusion blocks</title>
<p>Aligning the feature maps derived from the CNN branch with the patch embeddings within the transformer branch poses a significant challenge. To tackle this, we introduce the feature fusion block, aiming to continuously and interactively integrate local features with global representations. The substantial difference in dimensionalities between the CNN and transformer features is noteworthy. While CNN feature maps are characterized by dimensions <italic>C</italic>&#x000D7;<italic>H</italic>&#x000D7;<italic>W</italic> (representing channels, height, and width, respectively), patch embeddings assume a shape of (<italic>L</italic>&#x0002B;1) &#x000D7; <italic>J</italic>, where <italic>L</italic>, 1, and <italic>J</italic> denote the count of image patches, class token, and embedding dimensions, respectively. To reconcile these disparities, feature maps transmitted to the transformer branch undergo an initial 1 &#x000D7; 1 convolution to align their channel numbers with the patch embeddings. Subsequently, a down-sampling module (depicted in <xref ref-type="fig" rid="F2">Figure 2</xref>) aligns spatial dimensions, following which the feature maps are amalgamated with patch embeddings, as portrayed in <xref ref-type="fig" rid="F2">Figure 2</xref>. Upon feedback from the transformer to the CNN branch, the patch embeddings necessitate up-sampling (as illustrated in <xref ref-type="fig" rid="F2">Figure 2</xref>) to match the spatial scale. Following this, aligning the channel dimension with that of the CNN feature maps through a 1 &#x000D7; 1 convolution is performed, integrating these adjusted embeddings into the feature maps. Furthermore, LayerNorm and BatchNorm modules are employed to regularize the features. Moreover, a significant semantic disparity arises between feature maps and patch embeddings. While feature maps stem from local convolutional operators, patch embeddings arise from global self-attention mechanisms. Consequently, the feature fusion block is incorporated into each block (excluding the initial one) to bridge this semantic gap progressively.</p>
</sec>
</sec>
<sec>
<title>3.2 Experiments</title>
<sec>
<title>3.2.1 Datasets</title>
<p>Four public datasets, <italic>DRIVE</italic> (<xref ref-type="bibr" rid="B30">30</xref>), <italic>CHASEDB1</italic> (<xref ref-type="bibr" rid="B31">31</xref>), <italic>STARE</italic> (<xref ref-type="bibr" rid="B32">32</xref>), and <italic>HRF</italic> (<xref ref-type="bibr" rid="B33">33</xref>), were used in our experiments. The images of these datasets were captured by different devices and with different image sizes. A detailed description of each dataset is elaborated below:</p>
<list list-type="simple">
<list-item><p>1) <italic>DRIVE</italic> dataset: the dataset known as <italic>DRIVE</italic> comprises 40 pairs of fundus images accompanied by their respective labels for vessel segmentation. Each image within this dataset measures 565 &#x000D7; 584 pixels. Furthermore, the dataset has been partitioned into distinct training and test sets, encompassing 20 pairs of images and corresponding labels within each set. Notably, in the test set, every image has undergone labeling by two medical professionals. Typically, the initial label is considered the reference standard (ground truth), while the second label serves as a human observation used to assess accuracy.</p></list-item>
<list-item><p>2) <italic>CHASEDB1</italic> dataset: the CHASEDB1 dataset encompasses a collection of 28 images, comprising samples from both the left and right eyes, with each image possessing dimensions of 999 &#x000D7; 960 pixels. Past investigations have specifically delineated the dataset&#x00027;s utilization, designating a distinct partition for training and testing purposes. According to prior scholarly research (<xref ref-type="bibr" rid="B31">31</xref>), a selection strategy has been employed, with the final eight images demarcated for evaluation as testing samples, while the preceding images have been earmarked for utilization as training samples. This segmentation strategy in the dataset facilitates a structured approach for model training and evaluation, enabling a systematic analysis of algorithm performance on separate subsets of images to ensure robustness and generalizability in vessel segmentation tasks.</p></list-item>
<list-item><p>3) <italic>STARE</italic> dataset: each image within the <italic>STARE</italic> dataset measures 700 &#x000D7; 605 pixels. This dataset comprises 20 color fundus images without a predefined division into training and test sets. Previous studies have employed two common schemes for test set allocation to assess method performance. One approach involves assigning 10 images to the training set and the remaining 10 to the test set. Alternatively, the Leave-One-Out method has been utilized, wherein each image successively serves as the test set while the remaining images form the training set for evaluation purposes in different iterations.</p></list-item>
<list-item><p>4) <italic>HRF</italic> dataset: the HRF dataset comprises 45 fundus images with a resolution of 3,504 &#x000D7; 2,336 pixels. From this dataset, 15 images from are allocated to the training set, while the remaining 30 images constitute the test set. To mitigate computational expenses, both the images and their corresponding labels are downsampled twice, as noted in (<xref ref-type="bibr" rid="B34">34</xref>).</p></list-item>
</list>
</sec>
<sec>
<title>3.2.2 Loss functions</title>
<p>Commonly utilized region-based losses, like Dice loss (<xref ref-type="bibr" rid="B35">35</xref>), often result in highly precise segmentation. However, they tend to disregard the intricate vessel shapes due to a multitude of pixels outside the target area, overshadowing the significance of those delineating the vessel (<xref ref-type="bibr" rid="B36">36</xref>&#x02013;<xref ref-type="bibr" rid="B40">40</xref>). This oversight may contribute to relatively imprecise retinal vessel segmentation and, consequently, inaccurate quantification of hypertensive retinopathy. In response, we incorporated the TopK loss (<xref ref-type="disp-formula" rid="E1">Equation 1</xref>) (<xref ref-type="bibr" rid="B41">41</xref>, <xref ref-type="bibr" rid="B42">42</xref>) to emphasize the retinal vessels during the training process specifically. When objects exhibit sizes that are not notably smaller in comparison to the convolutional neural network&#x00027;s (CNN) receptive field, the vessel emerges as the most variable component within the prediction, displaying the least certainty; thus, the loss within the vessel region tends to be the highest among the predictions (<xref ref-type="bibr" rid="B43">43</xref>). Building upon these observations and rationale, the TopK loss is formulated as follows:</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M1"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>o</mml:mi><mml:mi>p</mml:mi><mml:mi>K</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mi>K</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo class="qopname">log</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>g</italic><sub><italic>i</italic></sub> is the ground truth of pixel <italic>i</italic>, <italic>s</italic><sub><italic>i</italic></sub> is the corresponding predicted probability, and <italic>K</italic> is the set of the <italic>k%</italic> pixels with the lowest prediction accuracy. While sole vessel-focused loss often causes training instability (<xref ref-type="bibr" rid="B44">44</xref>), region-based loss, such as Dice loss (<xref ref-type="disp-formula" rid="E2">Equation 2</xref>) (<xref ref-type="bibr" rid="B35">35</xref>), is needed at the early stage of the training. We represent Dice loss as follows:</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M2"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:mfrac><mml:mrow><mml:mn>2</mml:mn><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02229;</mml:mo><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:mo>&#x0002B;</mml:mo><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>V</italic><sub><italic>g</italic></sub> is the ground truth label and <italic>V</italic><sub><italic>s</italic></sub> is the prediction result of segmentation. We coupled TopK with region-based Dice loss as our final loss function (<xref ref-type="disp-formula" rid="E3">Equation 3</xref>) for the retinal vessel segmentation.</p>
<disp-formula id="E3"><label>(3)</label><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>o</mml:mi><mml:mi>p</mml:mi><mml:mi>K</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
</sec>
<sec>
<title>3.2.3 Experimental setting</title>
<p>To enrich the dataset, we introduce random rotations on the fly to the input images in the training dataset, applied to both segmentation tasks. Specifically, these rotations span from &#x02013;20 to 20 degrees. Additionally, 10% of the training dataset is randomly chosen to serve as the validation dataset. The proposed network was implemented utilizing the PyTorch Library and executed on the Nvidia GeForce TITAN Xp GPU. Throughout the training phase, we employed the AdamW optimizer to fine-tune the deep model. To ensure effective training, a gradually decreasing learning rate was adopted, commencing at 0.0001, alongside a momentum parameter set at 0.9. For each iteration, a random patch of size 118 &#x000D7; 118 from the image was selected for training purposes, with a specified batch size of 16. A backbone of ResNet50 (<xref ref-type="bibr" rid="B27">27</xref>) is used in this work.</p>
</sec>
<sec>
<title>3.2.4 Evaluation metrics</title>
<p>The model&#x00027;s output is represented as a probability map, assigning to each pixel the probability of being associated with the vessel class. Throughout the experiments, a probability threshold of 0.5 was employed to yield the results. To comprehensively assess the efficacy of our proposed framework during the testing phase, the subsequent metrics will be computed:</p>
<list list-type="bullet">
<list-item><p>Acc (accuracy) = (TP &#x0002B; TN) / (TP &#x0002B; TN &#x0002B; FP &#x0002B; FN),</p></list-item>
<list-item><p>SE (sensitivity) = TP / (TP &#x0002B; FN),</p></list-item>
<list-item><p>SP (specificity) = TN / (TN &#x0002B; FP)</p></list-item>
<list-item><p>F1 (F1 score) = (2 &#x000D7; TP) / (2 &#x000D7; TP &#x0002B; FP &#x0002B; FN)</p></list-item>
<list-item><p>AUROC = area under the receiver operating characteristic curve.</p></list-item>
</list>
<p>In this context, the correct classification of a vessel pixel is categorized as a true positive (TP), while misclassification is identified as a false positive (FP). Correspondingly, accurate classification of a non-vessel pixel is considered a true negative (TN), whereas misclassification is denoted as a false negative (FN).</p>
</sec>
</sec>
<sec>
<title>3.3 Compared methods</title>
<p>We compared our approach to other classic and state-of-the-art models that have achieved promising performance on different medical image segmentation tasks. All of the experiments are conducted under the same experimental setting. The compared methods are briefly introduced below:</p>
<list list-type="bullet">
<list-item><p>Unet (<xref ref-type="bibr" rid="B45">45</xref>): Unet is a CNN architecture used for image segmentation tasks. Its U-shaped design includes an encoder (contracting path) for feature extraction and a symmetric decoder (expansive path) for generating segmented outputs. The network uses skip connections to preserve fine details and context, making it effective for tasks like biomedical image segmentation.</p></list-item>
<list-item><p>Unet&#x0002B;&#x0002B; (<xref ref-type="bibr" rid="B46">46</xref>): Unet&#x0002B;&#x0002B; is an advanced version of the U-Net architecture designed for image segmentation tasks. It improves upon U-Net by introducing nested skip connections and aggregation pathways, allowing better multi-scale feature integration and context aggregation. This enhancement leads to more accurate and precise segmentation results compared to the original U-Net model.</p></list-item>
<list-item><p>Swin-Transformer (<xref ref-type="bibr" rid="B47">47</xref>): Swin-Transformer is a hierarchical vision transformer (<xref ref-type="bibr" rid="B28">28</xref>) structure. It uses shifted windows to process image patches hierarchically, allowing for improved global context understanding. This architecture has demonstrated competitive segmentation performance with efficient computation.</p></list-item>
<list-item><p>AttenUnet (<xref ref-type="bibr" rid="B48">48</xref>): The AttenUnet enhances the traditional U-Net architecture that integrates attention mechanisms. These mechanisms enable the network to focus on important image features during segmentation tasks. It improves accuracy by refining object delineation and suppressing irrelevant information. This variant is particularly effective in tasks like medical image segmentation, where precise localization of structures is essential.</p></list-item>
<list-item><p>TransUnet (<xref ref-type="bibr" rid="B49">49</xref>): TransUNet is a proposed architecture to improve medical image segmentation, addressing limitations seen in the widely used U-Net model. It combines the strengths of Transformers&#x00027; global self-attention with U-Net&#x00027;s precise localization abilities. The Transformer part encodes image patches from a CNN feature map to capture global context, while the decoder integrates this with high-resolution feature maps for accurate localization.</p></list-item>
</list>
</sec>
</sec>
<sec sec-type="results" id="s4">
<title>4 Results</title>
<sec>
<title>4.1 Vessel segmentation performance</title>
<p><xref ref-type="fig" rid="F3">Figure 3</xref> illustrates qualitative comparison with other compared methods on the test dataset. <xref ref-type="table" rid="T1">Tables 1</xref>&#x02013;<xref ref-type="table" rid="T4">4</xref> shows the quantitative performance of <italic>Ours</italic> and other methods on four different datasets, respectively.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Qualitative results of the vessel segmentation. We compare our model with Unet (<xref ref-type="bibr" rid="B45">45</xref>), Unet&#x0002B;&#x0002B; (<xref ref-type="bibr" rid="B46">46</xref>), Swin-Transformer (<xref ref-type="bibr" rid="B47">47</xref>), AttenUnet (<xref ref-type="bibr" rid="B48">48</xref>), TransUnet (<xref ref-type="bibr" rid="B49">49</xref>). Our method can produce more accurate segmentation results than the other methods compared with the ground truth.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmed-11-1377479-g0003.tif"/>
</fig>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Quantitative results comparison between our methods and other compared state-of-the-art methods on <italic>DRIVE</italic> dataset.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Methods</bold></th>
<th valign="top" align="center"><bold><italic>Acc</italic></bold></th>
<th valign="top" align="center"><bold><italic>SE</italic></bold></th>
<th valign="top" align="center"><bold><italic>SP</italic></bold></th>
<th valign="top" align="center"><bold><italic>F1</italic></bold></th>
<th valign="top" align="center"><bold><italic>AUROC</italic></bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><italic>Unet</italic></td>
<td valign="top" align="center">90.1 (89.1, 90.8)</td>
<td valign="top" align="center">76.5 (74.2, 78.1)</td>
<td valign="top" align="center">97.7 (95.8, 99.1)</td>
<td valign="top" align="center">80.3 (78.3, 82.3)</td>
<td valign="top" align="center">97.2 (95.0, 98.0)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Unet&#x0002B;&#x0002B;</italic></td>
<td valign="top" align="center">91.3 (90.4, 92.7)</td>
<td valign="top" align="center">79.2 (78.0, 80.6)</td>
<td valign="top" align="center">97.9 (95.2, 99.0)</td>
<td valign="top" align="center">81.0 (79.2, 82.5)</td>
<td valign="top" align="center">97.1 (95.8, 99.0)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Swin-Transformer</italic></td>
<td valign="top" align="center">92.3 (91.5, 92.9)</td>
<td valign="top" align="center">79.0 (77.9, 80.6)</td>
<td valign="top" align="center">98.1 (96.4, 99.2)</td>
<td valign="top" align="center">82.0 (81.0, 84.0)</td>
<td valign="top" align="center">97.6 (96.1, 98.3)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>AttenUnet</italic></td>
<td valign="top" align="center">92.1 (91.3, 93.2)</td>
<td valign="top" align="center">80.0 (78.3, 82.0)</td>
<td valign="top" align="center">98.3 (96.1, 99.5)</td>
<td valign="top" align="center">80.4 (78.5, 82.1)</td>
<td valign="top" align="center">97.4 (96.2, 98.6)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>TransUnet</italic></td>
<td valign="top" align="center">91.8 (91.2, 93.0)</td>
<td valign="top" align="center">80.3 (79.1, 81.3)</td>
<td valign="top" align="center">98.3 (97.2, 99.6)</td>
<td valign="top" align="center">80.1 (78.8, 80.9)</td>
<td valign="top" align="center">97.3 (96.4, 99.0)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Ours</italic></td>
<td valign="top" align="center"><bold>93.8</bold> (92.9, 94.8)</td>
<td valign="top" align="center"><bold>81.0</bold> (80.2, 82.6)</td>
<td valign="top" align="center"><bold>98.5</bold> (96.7, 99.1)</td>
<td valign="top" align="center"><bold>83.3</bold> (78.8, 82.1)</td>
<td valign="top" align="center"><bold>97.9</bold> (96.2, 98.8)</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Performance is reported with <italic>Acc, SE, SP, F1</italic> and <italic>AUROC</italic>. 95% confidence interval is presented in the bracket. The best performance is highlighted in bold.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Quantitative results comparison between our methods and other compared state-of-the-art methods on <italic>CHASEDB1</italic> dataset.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Methods</bold></th>
<th valign="top" align="center"><bold><italic>Acc</italic></bold></th>
<th valign="top" align="center"><bold><italic>SE</italic></bold></th>
<th valign="top" align="center"><bold><italic>SP</italic></bold></th>
<th valign="top" align="center"><bold><italic>F1</italic></bold></th>
<th valign="top" align="center"><bold><italic>AUROC</italic></bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><italic>Unet</italic></td>
<td valign="top" align="center">91.2 (89.8, 92.3)</td>
<td valign="top" align="center">60.3 (58.2, 61.4)</td>
<td valign="top" align="center">97.1 (96.4, 97.9)</td>
<td valign="top" align="center">79.7 (76.9, 81.0)</td>
<td valign="top" align="center">97.7 (96.6, 98.2)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Unet&#x0002B;&#x0002B;</italic></td>
<td valign="top" align="center">91.6 (89.8, 93.2)</td>
<td valign="top" align="center">63.0 (61.2, 65.0)</td>
<td valign="top" align="center">97.3 (95.5, 98.3)</td>
<td valign="top" align="center">80.1 (78.5, 82.1)</td>
<td valign="top" align="center">97.7 (96.2, 98.3)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Swin-Transformer</italic></td>
<td valign="top" align="center">92.3(91.0, 94.1)</td>
<td valign="top" align="center">62.9 (61.4, 64.0)</td>
<td valign="top" align="center">97.8 (96.2, 98.5)</td>
<td valign="top" align="center">80.3 (78.7, 81.7)</td>
<td valign="top" align="center">97.9 (96.2, 98.8)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>AttenUnet</italic></td>
<td valign="top" align="center">92.4 (91.0, 94.2)</td>
<td valign="top" align="center">67.7 (65.5, 68.3)</td>
<td valign="top" align="center">97.7 (96.2, 98.4)</td>
<td valign="top" align="center">79.9 (77.4, 80.6)</td>
<td valign="top" align="center">97.8 (97.0, 98.5)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>TransUnet</italic></td>
<td valign="top" align="center">92.6 (90.2, 94.4)</td>
<td valign="top" align="center">66.1 (64.6, 67.7)</td>
<td valign="top" align="center">98.0 (96.7, 99.0)</td>
<td valign="top" align="center">80.4 (78.9, 82.1)</td>
<td valign="top" align="center">98.2 (96.3, 99.9)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Ours</italic></td>
<td valign="top" align="center"><bold>93.7</bold> (91.7, 95.2)</td>
<td valign="top" align="center"><bold>69.0</bold> (67.4, 70.5)</td>
<td valign="top" align="center"><bold>98.9</bold> (97.2, 99.3)</td>
<td valign="top" align="center"><bold>81.6</bold> (81.0, 93.0)</td>
<td valign="top" align="center"><bold>98.9</bold> (98.1, 99.3)</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Performance is reported with <italic>Acc, SE, SP, F1</italic> and <italic>AUROC</italic>. 95% confidence interval is presented in the bracket. The best performance is highlighted in bold.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Quantitative results comparison between our methods and other compared state-of-the-art methods on <italic>STARE</italic> dataset.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Methods</bold></th>
<th valign="top" align="center"><italic><bold>Acc</bold></italic></th>
<th valign="top" align="center"><italic><bold>SE</bold></italic></th>
<th valign="top" align="center"><italic><bold>SP</bold></italic></th>
<th valign="top" align="center"><italic><bold>F1</bold></italic></th>
<th valign="top" align="center"><italic><bold>AUROC</bold></italic></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><italic>Unet</italic></td>
<td valign="top" align="center">93.3 (91.7, 95.2)</td>
<td valign="top" align="center">80.8 (78.7, 81.8)</td>
<td valign="top" align="center">98.1 (97.1, 99.0)</td>
<td valign="top" align="center">84.3 (82.2, 86.3)</td>
<td valign="top" align="center">98.1 (97.0, 99.0)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Unet&#x0002B;&#x0002B;</italic></td>
<td valign="top" align="center">94.2 (92.5, 96.0)</td>
<td valign="top" align="center">82.6 (81.6, 83.1)</td>
<td valign="top" align="center">98.0 (96.4, 99.0)</td>
<td valign="top" align="center">84.5 (83.7, 85.2)</td>
<td valign="top" align="center">98.3 (97.1, 99.2)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Swin-Transformer</italic></td>
<td valign="top" align="center">93.9 (92.8, 94.7)</td>
<td valign="top" align="center">83.0 (82.0, 84.2)</td>
<td valign="top" align="center">98.2 (96.9, 99.1)</td>
<td valign="top" align="center">84.1 (82.5, 86.2)</td>
<td valign="top" align="center">98.5 (97.4, 99.3)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>AttenUnet</italic></td>
<td valign="top" align="center">93.6 (92.7, 94.7)</td>
<td valign="top" align="center">82.9 (81.7, 84.2)</td>
<td valign="top" align="center">98.6 (96.2, 99.3)</td>
<td valign="top" align="center">84.6 (82.9, 86.3)</td>
<td valign="top" align="center">98.6 (96.7, 99.5)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>TransUnet</italic></td>
<td valign="top" align="center">93.4 (91.9, 94.7)</td>
<td valign="top" align="center">83.2 (81.6, 85.0)</td>
<td valign="top" align="center">98.7 (96.6, 99.4)</td>
<td valign="top" align="center">85.2 (83.7, 86.9)</td>
<td valign="top" align="center">98.1 (97.2, 99.1)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Ours</italic></td>
<td valign="top" align="center"><bold>94.8</bold> (92.9, 95.6)</td>
<td valign="top" align="center"><bold>84.2</bold> (82.6, 86.1)</td>
<td valign="top" align="center"><bold>99.2</bold> (97.7, 99.4)</td>
<td valign="top" align="center"><bold>86.6</bold> (85.9, 87.4)</td>
<td valign="top" align="center"><bold>99.3</bold> (98.4, 99.7)</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Performance is reported with <italic>Acc, SE, SP, F1</italic> and <italic>AUROC</italic>. 95% confidence interval is presented in the bracket. The best performance is highlighted in bold.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Quantitative results comparison between our methods and other compared state-of-the-art methods on <italic>HRF</italic> dataset.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Methods</bold></th>
<th valign="top" align="center"><italic><bold>Acc</bold></italic></th>
<th valign="top" align="center"><italic><bold>SE</bold></italic></th>
<th valign="top" align="center"><italic><bold>SP</bold></italic></th>
<th valign="top" align="center"><italic><bold>F1</bold></italic></th>
<th valign="top" align="center"><italic><bold>AUROC</bold></italic></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><italic>Unet</italic></td>
<td valign="top" align="center">94.4 (92.3, 96.0)</td>
<td valign="top" align="center">77.7 (75.8, 79.0)</td>
<td valign="top" align="center">95.1 (93.8, 96.7)</td>
<td valign="top" align="center">78.6 (76.9, 79.1)</td>
<td valign="top" align="center">97.2 (96.0, 98.0)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Unet&#x0002B;&#x0002B;</italic></td>
<td valign="top" align="center">94.8 (92.8, 96.2)</td>
<td valign="top" align="center">78.9 (78.0, 79.6)</td>
<td valign="top" align="center">95.1 (93.8, 96.4)</td>
<td valign="top" align="center">79.3 (78.7, 80.5)</td>
<td valign="top" align="center">97.3 (96.1, 98.3)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Swin-Transformer</italic></td>
<td valign="top" align="center">94.6 (92.9, 96.0)</td>
<td valign="top" align="center">79.1 (77.9, 80.5)</td>
<td valign="top" align="center">94.4 (92.7, 96.0)</td>
<td valign="top" align="center">79.5 (77.7, 80.6)</td>
<td valign="top" align="center">97.8 (96.2, 98.6)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>AttenUnet</italic></td>
<td valign="top" align="center">95.8 (93.9, 96.9)</td>
<td valign="top" align="center">77.6 (75.8, 79.1)</td>
<td valign="top" align="center">94.6 (93.9, 95.4)</td>
<td valign="top" align="center">78.8 (76.9, 79.5)</td>
<td valign="top" align="center">98.2 (97.0, 99.0)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>TransUnet</italic></td>
<td valign="top" align="center">95.3 (94.2, 96.3)</td>
<td valign="top" align="center">78.6 (77.4, 79.8)</td>
<td valign="top" align="center">94.7 (92.9, 96.3)</td>
<td valign="top" align="center">78.9 (77.0, 79.9)</td>
<td valign="top" align="center">98.3 (97.2, 99.1)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Ours</italic></td>
<td valign="top" align="center"><bold>96.2</bold> (95.0, 97.1)</td>
<td valign="top" align="center"><bold>79.9</bold> (78.0, 81.0)</td>
<td valign="top" align="center"><bold>94.9</bold> (92.8, 96.0)</td>
<td valign="top" align="center"><bold>79.9</bold> (77.9, 81.2)</td>
<td valign="top" align="center"><bold>98.8</bold> (97.9, 99.3)</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Performance is reported with <italic>Acc, SE, SP, F1</italic> and <italic>AUROC</italic>. 95% confidence interval is presented in the bracket. The best performance is highlighted in bold.</p>
</table-wrap-foot>
</table-wrap>
<p>Our proposed method can outperform other compared methods on <italic>DRIVE, CHASEDB1, STARE</italic>, and <italic>HRF</italic> datasets, respectively. In detail, <italic>Ours</italic> achieved 83.3% <italic>F1</italic> on <italic>DRIVE</italic> dataset, which outperformed <italic>Unet</italic> (<xref ref-type="bibr" rid="B45">45</xref>) by 3.6%, outperformed <italic>Swin-Transformer</italic> (<xref ref-type="bibr" rid="B47">47</xref>) by 1.6% and outperformed <italic>TransUnet</italic> (<xref ref-type="bibr" rid="B49">49</xref>) by 4.0%. <italic>Ours</italic> achieved 81.6% <italic>F1</italic> on <italic>CHASEDB1</italic> dataset, which outperformed <italic>Unet&#x0002B;&#x0002B;</italic> (<xref ref-type="bibr" rid="B46">46</xref>) by 1.9%, outperformed <italic>AttenUnet</italic> (<xref ref-type="bibr" rid="B48">48</xref>) by 2.1% and outperformed <italic>TransUnet</italic> (<xref ref-type="bibr" rid="B49">49</xref>) by 1.5%. <italic>Ours</italic> achieved 86.6% <italic>F1</italic> on <italic>STARE</italic> dataset, which outperformed <italic>Unet</italic> (<xref ref-type="bibr" rid="B45">45</xref>) by 2.7%, outperformed <italic>AttenUnet</italic> (<xref ref-type="bibr" rid="B48">48</xref>) by 2.4% and outperformed <italic>TransUnet</italic> (<xref ref-type="bibr" rid="B49">49</xref>) by 1.6%. <italic>Ours</italic> achieved 79.9% <italic>F1</italic> on <italic>HRF</italic> dataset, which outperformed <italic>Unet&#x0002B;&#x0002B;</italic> (<xref ref-type="bibr" rid="B46">46</xref>) by 0.8%, outperformed <italic>Swin-Transformer</italic> (<xref ref-type="bibr" rid="B47">47</xref>) by 0.5% and outperformed <italic>TransUnet</italic> (<xref ref-type="bibr" rid="B49">49</xref>) by 1.3%. Notably, <italic>Swin-Transformer</italic> (<xref ref-type="bibr" rid="B47">47</xref>) and <italic>TransUnet</italic> (<xref ref-type="bibr" rid="B49">49</xref>) belong to the transformer-based model structure, which demonstrates a superior performance on many tasks. However, in this work, the limited data size is one of the leading reasons for the relatively low performance of those datasets. Another reason could be the task&#x00027;s own nature of vessel segmentation, where more local information is needed rather than the long-range relationship between pixels. Thus, given two brunches with transformer and CNN structures and fusion modules, our proposed model can simultaneously tackle both the local semantic information and long-range spatial information for the segmentation task.</p>
<p><xref ref-type="fig" rid="F3">Figure 3</xref> shows the qualitative comparison between ours and other compared methods. It demonstrated that our proposed methods can segment the vessels more accurately. This is important for vessel segmentation tasks and hypertensive retinopathy quantification with more accurate vessel area calculation.</p>
</sec>
<sec>
<title>4.2 Ablation study</title>
<sec>
<title>4.2.1 Ablation study on loss functions</title>
<p>We did ablation study experiments on loss functions. We maintain the same model structure and only change the loss functions. In detail, we remove Dice loss and TopK loss, respectively, to evaluate their respective contribution to the performance of the proposed models. Furthermore, we replace TopK loss with a cross-entropy loss to validate the effectiveness of TopK loss in the segmentation task. <xref ref-type="table" rid="T5">Table 5</xref> demonstrates that Dice Loss can lead to a 6.2% <italic>F1</italic> and <italic>TopK</italic> loss can lead to a 2.9% <italic>F1</italic> performance. On the other hand, Dice loss can lead to 15.5% <italic>SE</italic> performance, and <italic>TopK</italic> loss can lead to a 2.8% <italic>SE</italic> performance on <italic>Drive</italic> dataset. Additionally, compared with cross-entropy loss, the TopK loss could lead to a 1.5% <italic>F1</italic> improvement and 2.3% <italic>SE</italic> improvement. Each loss function can boost the model&#x00027;s performance in different evaluation metrics. This demonstrated that the adopted loss function can both contribute to the learning process and benefit the vessel segmentation performance.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Quantitative ablation study results of the loss function on DRIVE dataset.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Methods</bold></th>
<th valign="top" align="center"><italic><bold>Acc</bold></italic></th>
<th valign="top" align="center"><italic><bold>SE</bold></italic></th>
<th valign="top" align="center"><italic><bold>SP</bold></italic></th>
<th valign="top" align="center"><italic><bold>F1</bold></italic></th>
<th valign="top" align="center"><italic><bold>AUROC</bold></italic></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><italic>w/o Dice loss</italic></td>
<td valign="top" align="center">86.4 (85.0, 88.0)</td>
<td valign="top" align="center">70.1 (68.2, 72.5)</td>
<td valign="top" align="center">94.4 (92.3, 96.0)</td>
<td valign="top" align="center">75.6 (74.1, 76.2)</td>
<td valign="top" align="center">94.5 (92.8, 95.6)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>w/o TopK loss</italic></td>
<td valign="top" align="center">88.9 (87.3, 89.6)</td>
<td valign="top" align="center">78.8 (76.9, 80.3)</td>
<td valign="top" align="center">96.0 (94.2, 97.2)</td>
<td valign="top" align="center">78.0 (77.0, 79.2)</td>
<td valign="top" align="center">96.3 (94.8, 97.7)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>w/ Cross-entropy loss</italic></td>
<td valign="top" align="center">90.3 (89.6, 91.0)</td>
<td valign="top" align="center">79.2 (78.5, 80.0)</td>
<td valign="top" align="center">96.9 (95.8, 97.4)</td>
<td valign="top" align="center">79.1 (78.0, 80.2)</td>
<td valign="top" align="center">96.9 (95.8, 97.5)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Ours</italic></td>
<td valign="top" align="center"><bold>93.8</bold> (92.9, 94.8)</td>
<td valign="top" align="center"><bold>81.0</bold> (80.2, 82.6)</td>
<td valign="top" align="center"><bold>98.5</bold> (96.7, 99.1)</td>
<td valign="top" align="center"><bold>80.3</bold> (78.8, 82.1)</td>
<td valign="top" align="center"><bold>97.9</bold> (96.2, 98.8)</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Performance is reported with <italic>Acc, SE, SP, F1</italic> and <italic>AUROC</italic>. 95% confidence interval is presented in the bracket. The best performance is highlighted in bold.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<title>4.2.2 Ablation study on the models&#x00027; components</title>
<p>We did ablation study experiments on the model&#x00027;s components. In detail, we maintain the same model structure and only change the models&#x00027; structure by removing different modules, including <italic>Trans-Block, CNN-Block</italic> and <italic>Fusion-Block</italic>, respectively. In detail, we remove each of those three modules, respectively, to evaluate their respective contribution to the performance of the proposed models. <xref ref-type="table" rid="T6">Table 6</xref> demonstrates that <italic>Trans-Block</italic> can lead to a 10% <italic>F1, CNN-Block</italic> can lead to a 10.3% <italic>F1</italic> performance and <italic>Fusion-Block</italic> can lead to a 7.9% <italic>F1</italic> performance boost. On the other hand, <italic>Trans-Block</italic> can lead to a 3.3% <italic>SE</italic> performance, <italic>CNN-Block</italic> can lead to a 2.3% <italic>SE</italic> performance, and <italic>Fusion-Block</italic> can lead to an 0.9% <italic>SE</italic> performance on <italic>Drive</italic> dataset. Each module can boost the model&#x00027;s performance in different evaluation metrics. This demonstrated that the proposed modules can all contribute to the learning process and benefit the vessel segmentation performance.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>Quantitative ablation study results of the model&#x00027;s components on DRIVE dataset.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Methods</bold></th>
<th valign="top" align="center"><italic><bold>Acc</bold></italic></th>
<th valign="top" align="center"><italic><bold>SE</bold></italic></th>
<th valign="top" align="center"><italic><bold>SP</bold></italic></th>
<th valign="top" align="center"><italic><bold>F1</bold></italic></th>
<th valign="top" align="center"><italic><bold>AUROC</bold></italic></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><italic>w/o Trans-Block</italic></td>
<td valign="top" align="center">88.9 (87.6, 89.5)</td>
<td valign="top" align="center">78.4 (76.8, 79.3)</td>
<td valign="top" align="center">92.1 (91.2, 92.9)</td>
<td valign="top" align="center">73.0 (71.5, 74.6)</td>
<td valign="top" align="center">95.2 (93.7, 96.6)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>w/o CNN-Block</italic></td>
<td valign="top" align="center">89.1 (87.9, 90.8)</td>
<td valign="top" align="center">79.2 (78.2, 80.6)</td>
<td valign="top" align="center">92.3 (91.4, 92.9)</td>
<td valign="top" align="center">72.8 (71.6, 73.5)</td>
<td valign="top" align="center">95.3 (93.8, 96.6)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>w/o Fusion-Block</italic></td>
<td valign="top" align="center">91.2 (89.9, 92.3)</td>
<td valign="top" align="center">80.3 (78.8, 81.6)</td>
<td valign="top" align="center">93.1 (92.1, 94.4)</td>
<td valign="top" align="center">74.4 (72.6, 76.6)</td>
<td valign="top" align="center">96.3 (95.8, 96.7)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Ours</italic></td>
<td valign="top" align="center"><bold>93.8</bold> (92.9, 94.8)</td>
<td valign="top" align="center"><bold>81.0</bold> (80.2, 82.6)</td>
<td valign="top" align="center"><bold>98.5</bold> (96.7, 99.1)</td>
<td valign="top" align="center"><bold>80.3</bold> (78.8, 82.1)</td>
<td valign="top" align="center"><bold>97.9</bold> (96.2, 98.8)</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Performance is reported with <italic>Acc, SE, SP, F1</italic> and <italic>AUROC</italic>. 95% confidence interval is presented in the bracket. The best performance is highlighted in bold.</p>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
</sec>
<sec id="s5">
<title>5 Hypertensive retinopathy quantification</title>
<p>The proposed method has demonstrated a promising retinal vessel segmentation performance on different datasets and benchmarks. Additionally, precise segmentation of retinal vessels plays a vital role in hypertensive retinopathy detection, whereas manual segmentation tends to be cumbersome and time-consuming (<xref ref-type="bibr" rid="B50">50</xref>). The model proposed can generate a binary mask distinguishing vessel pixels as one and background pixels as zero. This mask effectively quantifies the total count of vessel pixels within each mask. The ratio (<italic>R</italic><sub><italic>vessel</italic></sub>) between the count of vessel pixels and non-vessel pixels is defined as follows:</p>
<disp-formula id="E4"><label>(4)</label><mml:math id="M4"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>v</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>v</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>N</italic><sub><italic>v</italic></sub> represents the count of vessel pixels, and <italic>N</italic><sub><italic>non</italic></sub> denotes the count of non-vessel pixels. The ratio <italic>R</italic><sub><italic>vessel</italic></sub> (<xref ref-type="disp-formula" rid="E4">Equation 4</xref>) serves as a valuable metric in identifying hypertensive retinopathy within fundus images. Hypertensive retinopathy leads to vascular constriction (<xref ref-type="bibr" rid="B51">51</xref>, <xref ref-type="bibr" rid="B52">52</xref>), resulting in a decrease in the count of vessel pixels (<italic>R</italic><sub><italic>vessel</italic></sub>).</p>
<p>Detection of hypertensive retinopathy, characterized by vascular constriction, involves assessing changes in <italic>R</italic><sub><italic>vessel</italic></sub> across sequential examinations. Increases or decreases in <italic>R</italic><sub><italic>vessel</italic></sub> indicate the occurrence or progression of hypertensive retinopathy, respectively. Hence, our proposed methods offer a straightforward approach for detecting hypertensive retinopathy.</p>
<p>In the future, with increased datasets comprising fundus images from hypertensive and healthy patients, we can further analyze vessel changes within these images. In real-world clinical practice, comparing the <italic>R</italic><sub><italic>vessel</italic></sub> obtained from consecutive visits can serve as a diagnostic tool. Additionally, the detection of newly formed vessels can be achieved by subtracting images from successive visits post-segmentation. This approach enables the identification and tracking of changes in vasculature over time, offering potential insights for clinical assessment and monitoring.</p>
</sec>
<sec id="s6">
<title>6 Limitation and future works</title>
<p>While our deep learning method has shown promising results in the challenging tasks of retinal vessel segmentation and hypertensive retinopathy quantification, it&#x00027;s important to acknowledge the nuanced landscape of limitations accompanying such endeavors. One notable factor is the inherent variability present in medical imaging datasets. Our model&#x00027;s performance could be influenced by factors such as variations in image quality and disease severity across different datasets. Moreover, despite achieving commendable results overall, there are instances where the model might struggle to accurately delineate intricate vascular structures or detect subtle manifestations of hypertensive retinopathy. This suggests the need for further exploration and refinement of our approach.</p>
<p>In future research, attention could be directed toward enhancing the model&#x00027;s robustness and adaptability to diverse imaging conditions and patient populations. Techniques such as advanced data augmentation and domain adaptation strategies could prove instrumental in achieving this goal. Additionally, integrating complementary sources of information, such as clinical metadata or genetic markers, holds promise for enriching the predictive capabilities of our model and enhancing its clinical relevance. Furthermore, the pursuit of interpretability and explainability remains paramount. Providing clinicians with insights into how the model arrives at its predictions can foster trust and facilitate its integration into real-world clinical workflows. However, this pursuit must be balanced with ethical considerations, particularly concerning patient privacy, algorithmic bias, and the potential consequences of automated decision-making in healthcare settings. By addressing these multifaceted challenges, we can pave the way for more effective and responsible deployment of deep learning technologies in ophthalmology and beyond.</p>
</sec>
<sec sec-type="conclusions" id="s7">
<title>7 Conclusion</title>
<p>We have proposed a novel and comprehensive framework for retinal vessel segmentation and hypertensive retinopathy quantification. It takes advantage of heterogeneous feature cross-attention with the help of local emphasis CNN and long-range emphasis transformer structure with a fusion module to aggregate the information. Our experiments on four large-scale datasets have demonstrated that our framework can simultaneously conduct accurate segmentation and potential hypertensive retinopathy quantification performance.</p>
</sec>
<sec sec-type="data-availability" id="s8">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s9">
<title>Author contributions</title>
<p>XL: Writing &#x02013; review &#x00026; editing, Writing &#x02013; original draft, Visualization, Validation, Software, Methodology, Formal analysis, Data curation. HT: Writing &#x02013; review &#x00026; editing, Writing &#x02013; original draft, Visualization, Validation, Resources, Formal analysis, Conceptualization. WW: Writing &#x02013; review &#x00026; editing, Writing &#x02013; original draft, Visualization, Validation, Software, Methodology. ZC: Writing &#x02013; review &#x00026; editing, Writing &#x02013; original draft, Supervision, Resources, Project administration, Funding acquisition, Conceptualization.</p>
</sec>
</body>
<back>
<sec sec-type="funding-information" id="s10">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This research was supported by the Clinical special of Science and Technology Department of Guizhou Province (No. Qiankehechengguo-LC[2021]023) and the Youth Foundation of Guizhou Provincial People&#x00027;s Hospital (No. GZSYQN[2019]06). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Houben</surname> <given-names>AJ</given-names></name> <name><surname>Martens</surname> <given-names>RJ</given-names></name> <name><surname>Stehouwer</surname> <given-names>CD</given-names></name></person-group>. <article-title>Assessing microvascular function in humans from a chronic disease perspective</article-title>. <source>J Am Soc Nephrol</source>. (<year>2017</year>) <volume>28</volume>:<fpage>3461</fpage>. <pub-id pub-id-type="doi">10.1681/ASN.2017020157</pub-id><pub-id pub-id-type="pmid">28904002</pub-id></citation></ref>
<ref id="B2">
<label>2.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rizzoni</surname> <given-names>D</given-names></name> <name><surname>Agabiti-Rosei</surname> <given-names>C</given-names></name> <name><surname>De Ciuceis</surname> <given-names>C</given-names></name> <name><surname>Boari</surname> <given-names>GEM</given-names></name></person-group>. <article-title>Subclinical hypertension-mediated organ damage (HMOD) in hypertension: atherosclerotic cardiovascular disease (ASCVD) and calcium score</article-title>. <source>High Blood Press Cardiovasc Prev</source>. (<year>2023</year>) <volume>30</volume>:<fpage>17</fpage>&#x02013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1007/s40292-022-00551-4</pub-id><pub-id pub-id-type="pmid">36376777</pub-id></citation></ref>
<ref id="B3">
<label>3.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Meng</surname> <given-names>Y</given-names></name> <name><surname>Bridge</surname> <given-names>J</given-names></name> <name><surname>Addison</surname> <given-names>C</given-names></name> <name><surname>Wang</surname> <given-names>M</given-names></name> <name><surname>Merritt</surname> <given-names>C</given-names></name> <name><surname>Franks</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>Bilateral adaptive graph convolutional network on CT based Covid-19 diagnosis with uncertainty-aware consensus-assisted multiple instance learning</article-title>. <source>Med Image Anal</source>. (<year>2023</year>) <volume>84</volume>:<fpage>102722</fpage>. <pub-id pub-id-type="doi">10.1016/j.media.2022.102722</pub-id><pub-id pub-id-type="pmid">36574737</pub-id></citation></ref>
<ref id="B4">
<label>4.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mancia</surname> <given-names>G</given-names></name> <name><surname>De Backer</surname> <given-names>G</given-names></name> <name><surname>Dominiczak</surname> <given-names>A</given-names></name> <name><surname>Cifkova</surname> <given-names>R</given-names></name> <name><surname>Fagard</surname> <given-names>R</given-names></name> <name><surname>Germano</surname> <given-names>G</given-names></name> <etal/></person-group>. <article-title>2007 Guidelines for the management of arterial hypertension: the Task Force for the Management of Arterial Hypertension of the European Society of Hypertension (ESH) and of the European Society of Cardiology (ESC)</article-title>. <source>Eur Heart J</source>. (<year>2007</year>) <volume>28</volume>:<fpage>1462</fpage>&#x02013;<lpage>536</lpage>. <pub-id pub-id-type="doi">10.1093/eurheartj/ehm236</pub-id><pub-id pub-id-type="pmid">17562668</pub-id></citation></ref>
<ref id="B5">
<label>5.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Flammer</surname> <given-names>J</given-names></name> <name><surname>Konieczka</surname> <given-names>K</given-names></name> <name><surname>Bruno</surname> <given-names>RM</given-names></name> <name><surname>Virdis</surname> <given-names>A</given-names></name> <name><surname>Flammer</surname> <given-names>AJ</given-names></name> <name><surname>Taddei</surname> <given-names>S</given-names></name></person-group>. <article-title>The eye and the heart</article-title>. <source>Eur Heart J</source>. (<year>2013</year>) <volume>34</volume>:<fpage>1270</fpage>&#x02013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1093/eurheartj/eht023</pub-id><pub-id pub-id-type="pmid">23401492</pub-id></citation></ref>
<ref id="B6">
<label>6.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wong</surname> <given-names>TY</given-names></name> <name><surname>Mitchell</surname> <given-names>P</given-names></name></person-group>. <article-title>Hypertensive retinopathy</article-title>. <source>N Engl J Med</source>. (<year>2004</year>) <volume>351</volume>:<fpage>2310</fpage>&#x02013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMra032865</pub-id><pub-id pub-id-type="pmid">15564546</pub-id></citation></ref>
<ref id="B7">
<label>7.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bidani</surname> <given-names>AK</given-names></name> <name><surname>Griffin</surname> <given-names>KA</given-names></name></person-group>. <article-title>Pathophysiology of hypertensive renal damage: implications for therapy</article-title>. <source>Hypertension</source>. (<year>2004</year>) <volume>44</volume>:<fpage>595</fpage>&#x02013;<lpage>601</lpage>. <pub-id pub-id-type="doi">10.1161/01.HYP.0000145180.38707.84</pub-id><pub-id pub-id-type="pmid">15452024</pub-id></citation></ref>
<ref id="B8">
<label>8.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Del Pinto</surname> <given-names>R.</given-names></name> <name><surname>Mul&#x000E8;</surname> <given-names>G</given-names></name> <name><surname>Vadal&#x000E0;</surname> <given-names>M</given-names></name> <name><surname>Carollo</surname> <given-names>C</given-names></name> <name><surname>Cottone</surname> <given-names>S</given-names></name> <name><surname>Agabiti Rosei</surname> <given-names>C</given-names></name> <etal/></person-group>. <article-title>Arterial hypertension and the hidden disease of the eye: diagnostic tools and therapeutic strategies</article-title>. <source>Nutrients</source>. (<year>2022</year>) <volume>14</volume>:<fpage>2200</fpage>. <pub-id pub-id-type="doi">10.3390/nu14112200</pub-id><pub-id pub-id-type="pmid">35683999</pub-id></citation></ref>
<ref id="B9">
<label>9.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rizzoni</surname> <given-names>D</given-names></name> <name><surname>Agabiti Rosei</surname> <given-names>C</given-names></name> <name><surname>De Ciuceis</surname> <given-names>C</given-names></name> <name><surname>Semeraro</surname> <given-names>F</given-names></name> <name><surname>Rizzoni</surname> <given-names>M</given-names></name> <name><surname>Docchio</surname> <given-names>F</given-names></name></person-group>. <article-title>New methods to study the microcirculation</article-title>. <source>Am J Hypertens</source>. (<year>2018</year>) <volume>31</volume>:<fpage>265</fpage>&#x02013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.1093/ajh/hpx211</pub-id><pub-id pub-id-type="pmid">29228086</pub-id></citation></ref>
<ref id="B10">
<label>10.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Peng</surname> <given-names>SY</given-names></name> <name><surname>Lee</surname> <given-names>YC.</given-names></name> <name><surname>Wu</surname> <given-names>IWn</given-names></name> <name><surname>Lee</surname> <given-names>CC</given-names></name> <name><surname>Sun</surname> <given-names>CC</given-names></name> <name><surname>Ding</surname> <given-names>JJ</given-names></name> <etal/></person-group>. <article-title>Impact of blood pressure control on retinal microvasculature in patients with chronic kidney disease</article-title>. <source>Sci Rep</source>. (<year>2020</year>) <volume>10</volume>:<fpage>14275</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-020-71251-z</pub-id><pub-id pub-id-type="pmid">32868805</pub-id></citation></ref>
<ref id="B11">
<label>11.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rizzoni</surname> <given-names>D</given-names></name> <name><surname>De Ciuceis</surname> <given-names>C</given-names></name> <name><surname>Porteri</surname> <given-names>E</given-names></name> <name><surname>Paiardi</surname> <given-names>S</given-names></name> <name><surname>Boari</surname> <given-names>GE</given-names></name> <name><surname>Mortini</surname> <given-names>P</given-names></name> <etal/></person-group>. <article-title>Altered structure of small cerebral arteries in patients with essential hypertension</article-title>. <source>J Hypertens</source>. (<year>2009</year>) <volume>27</volume>:<fpage>838</fpage>&#x02013;<lpage>45</lpage>. <pub-id pub-id-type="doi">10.1097/HJH.0b013e32832401ea</pub-id><pub-id pub-id-type="pmid">19300112</pub-id></citation></ref>
<ref id="B12">
<label>12.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Arsalan</surname> <given-names>M</given-names></name> <name><surname>Haider</surname> <given-names>A</given-names></name> <name><surname>Lee</surname> <given-names>YW</given-names></name> <name><surname>Park</surname> <given-names>KR</given-names></name></person-group>. <article-title>Detecting retinal vasculature as a key biomarker for deep learning-based intelligent screening and analysis of diabetic and hypertensive retinopathy</article-title>. <source>Expert Syst Appl</source>. (<year>2022</year>) <volume>200</volume>:<fpage>117009</fpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2022.117009</pub-id></citation>
</ref>
<ref id="B13">
<label>13.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>H</given-names></name> <name><surname>Wang</surname> <given-names>W</given-names></name> <name><surname>Zhong</surname> <given-names>J</given-names></name> <name><surname>Lei</surname> <given-names>B</given-names></name> <name><surname>Wen</surname> <given-names>Z</given-names></name> <name><surname>Qin</surname> <given-names>J</given-names></name></person-group>. <article-title>Scs-net: a scale and context sensitive network for retinal vessel segmentation</article-title>. <source>Med Image Anal</source>. (<year>2021</year>) <volume>70</volume>:<fpage>102025</fpage>. <pub-id pub-id-type="doi">10.1016/j.media.2021.102025</pub-id><pub-id pub-id-type="pmid">33721692</pub-id></citation></ref>
<ref id="B14">
<label>14.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>J</given-names></name> <name><surname>Huang</surname> <given-names>X</given-names></name> <name><surname>Zhou</surname> <given-names>H</given-names></name> <name><surname>Wang</surname> <given-names>Y</given-names></name> <name><surname>Zhang</surname> <given-names>Q</given-names></name></person-group>. <article-title>Stimulus-guided adaptive transformer network for retinal blood vessel segmentation in fundus images</article-title>. <source>Med Image Anal</source>. (<year>2023</year>) <volume>89</volume>:<fpage>102929</fpage>. <pub-id pub-id-type="doi">10.1016/j.media.2023.102929</pub-id><pub-id pub-id-type="pmid">37598606</pub-id></citation></ref>
<ref id="B15">
<label>15.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wei</surname> <given-names>J</given-names></name> <name><surname>Zhu</surname> <given-names>G</given-names></name> <name><surname>Fan</surname> <given-names>Z</given-names></name> <name><surname>Liu</surname> <given-names>J</given-names></name> <name><surname>Rong</surname> <given-names>Y</given-names></name> <name><surname>Mo</surname> <given-names>J</given-names></name> <etal/></person-group>. <article-title>Genetic U-Net: automatically designed deep networks for retinal vessel segmentation using a genetic algorithm</article-title>. <source>IEEE Trans Med Imaging</source>. (<year>2021</year>) <volume>41</volume>:<fpage>292</fpage>&#x02013;<lpage>307</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2021.3111679</pub-id><pub-id pub-id-type="pmid">34506278</pub-id></citation></ref>
<ref id="B16">
<label>16.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tan</surname> <given-names>Y</given-names></name> <name><surname>Yang</surname> <given-names>KF</given-names></name> <name><surname>Zhao</surname> <given-names>SX</given-names></name> <name><surname>Li</surname> <given-names>YJ</given-names></name></person-group>. <article-title>Retinal vessel segmentation with skeletal prior and contrastive loss</article-title>. <source>IEEE Trans Med Imaging</source>. (<year>2022</year>) <volume>41</volume>:<fpage>2238</fpage>&#x02013;<lpage>51</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2022.3161681</pub-id><pub-id pub-id-type="pmid">35320091</pub-id></citation></ref>
<ref id="B17">
<label>17.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Y</given-names></name> <name><surname>Zhang</surname> <given-names>Y</given-names></name> <name><surname>Cui</surname> <given-names>W</given-names></name> <name><surname>Lei</surname> <given-names>B</given-names></name> <name><surname>Kuang</surname> <given-names>X</given-names></name> <name><surname>Zhang</surname> <given-names>T</given-names></name></person-group>. <article-title>Dual encoder-based dynamic-channel graph convolutional network with edge enhancement for retinal vessel segmentation</article-title>. <source>IEEE Trans Med Imaging</source>. (<year>2022</year>) <volume>41</volume>:<fpage>1975</fpage>&#x02013;<lpage>89</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2022.3151666</pub-id><pub-id pub-id-type="pmid">35167444</pub-id></citation></ref>
<ref id="B18">
<label>18.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Annunziata</surname> <given-names>R</given-names></name> <name><surname>Trucco</surname> <given-names>E</given-names></name></person-group>. <article-title>Accelerating convolutional sparse coding for curvilinear structures segmentation by refining SCIRD-TS filter banks</article-title>. <source>IEEE Trans Med Imaging</source>. (<year>2016</year>) <volume>35</volume>:<fpage>2381</fpage>&#x02013;<lpage>92</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2016.2570123</pub-id><pub-id pub-id-type="pmid">27214893</pub-id></citation></ref>
<ref id="B19">
<label>19.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mar&#x000ED;n</surname> <given-names>D</given-names></name> <name><surname>Aquino</surname> <given-names>A</given-names></name> <name><surname>Geg&#x000FA;ndez-Arias</surname> <given-names>ME</given-names></name> <name><surname>Bravo</surname> <given-names>JM</given-names></name></person-group>. <article-title>A new supervised method for blood vessel segmentation in retinal images by using gray-level and moment invariants-based features</article-title>. <source>IEEE Trans Med Imaging</source>. (<year>2010</year>) <volume>30</volume>:<fpage>146</fpage>&#x02013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2010.2064333</pub-id><pub-id pub-id-type="pmid">20699207</pub-id></citation></ref>
<ref id="B20">
<label>20.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Soares</surname> <given-names>JV</given-names></name> <name><surname>Leandro</surname> <given-names>JJ</given-names></name> <name><surname>Cesar</surname> <given-names>RM</given-names></name> <name><surname>Jelinek</surname> <given-names>HF</given-names></name> <name><surname>Cree</surname> <given-names>MJ</given-names></name></person-group>. <article-title>Retinal vessel segmentation using the 2-D Gabor wavelet and supervised classification</article-title>. <source>IEEE Trans Med Imaging</source>. (<year>2006</year>) <volume>25</volume>:<fpage>1214</fpage>&#x02013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2006.879967</pub-id><pub-id pub-id-type="pmid">16967806</pub-id></citation></ref>
<ref id="B21">
<label>21.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Maninis</surname> <given-names>KK</given-names></name> <name><surname>Pont-Tuset</surname> <given-names>J</given-names></name> <name><surname>Arbel&#x000E1;ez</surname> <given-names>P</given-names></name> <name><surname>Van Gool</surname> <given-names>L</given-names></name></person-group>. <article-title>Deep retinal image understanding</article-title>. In: <source>Medical Image Computing and Computer-Assisted Intervention-MICCAI 2016: 19th International Conference, Athens, Greece, October 17-21, 2016, Proceedings, Part II 19</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2016</year>), p. <fpage>140</fpage>&#x02013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-46723-8_17</pub-id></citation>
</ref>
<ref id="B22">
<label>22.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Oliveira</surname> <given-names>A</given-names></name> <name><surname>Pereira</surname> <given-names>S</given-names></name> <name><surname>Silva</surname> <given-names>CA</given-names></name></person-group>. <article-title>Retinal vessel segmentation based on fully convolutional neural networks</article-title>. <source>Expert Syst Appl</source>. (<year>2018</year>) <volume>112</volume>:<fpage>229</fpage>&#x02013;<lpage>42</lpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2018.06.034</pub-id></citation>
</ref>
<ref id="B23">
<label>23.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yan</surname> <given-names>Z</given-names></name> <name><surname>Yang</surname> <given-names>X</given-names></name> <name><surname>Cheng</surname> <given-names>KT</given-names></name></person-group>. <article-title>Joint segment-level and pixel-wise losses for deep learning based retinal vessel segmentation</article-title>. <source>IEEE Trans Biomed Eng</source>. (<year>2018</year>) <volume>65</volume>:<fpage>1912</fpage>&#x02013;<lpage>23</lpage>. <pub-id pub-id-type="doi">10.1109/TBME.2018.2828137</pub-id><pub-id pub-id-type="pmid">29993396</pub-id></citation></ref>
<ref id="B24">
<label>24.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mou</surname> <given-names>L</given-names></name> <name><surname>Chen</surname> <given-names>L</given-names></name> <name><surname>Cheng</surname> <given-names>J</given-names></name> <name><surname>Gu</surname> <given-names>Z</given-names></name> <name><surname>Zhao</surname> <given-names>Y</given-names></name> <name><surname>Liu</surname> <given-names>J</given-names></name></person-group>. <article-title>Dense dilated network with probability regularized walk for vessel detection</article-title>. <source>IEEE Trans Med Imaging</source>. (<year>2019</year>) <volume>39</volume>:<fpage>1392</fpage>&#x02013;<lpage>403</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2019.2950051</pub-id><pub-id pub-id-type="pmid">31675323</pub-id></citation></ref>
<ref id="B25">
<label>25.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>B</given-names></name> <name><surname>Qiu</surname> <given-names>S</given-names></name> <name><surname>He</surname> <given-names>H</given-names></name></person-group>. <article-title>Dual encoding u-net for retinal vessel segmentation</article-title>. In: <source>Medical Image Computing and Computer Assisted Intervention-MICCAI 2019: 22nd International Conference, Shenzhen, China, October 13-17, 2019, Proceedings, Part I 22</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2019</year>), p. <fpage>84</fpage>&#x02013;<lpage>92</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-32239-7_10</pub-id></citation>
</ref>
<ref id="B26">
<label>26.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Peng</surname> <given-names>Z</given-names></name> <name><surname>Huang</surname> <given-names>W</given-names></name> <name><surname>Gu</surname> <given-names>S</given-names></name> <name><surname>Xie</surname> <given-names>L</given-names></name> <name><surname>Wang</surname> <given-names>Y</given-names></name> <name><surname>Jiao</surname> <given-names>J</given-names></name> <etal/></person-group>. <article-title>Conformer: local features coupling global representations for visual recognition</article-title>. In: <source>Proceedings of the IEEE/CVF International Conference on Computer Vision</source>. <publisher-loc>Montreal, QC</publisher-loc>: <publisher-name>IEEE</publisher-name> (<year>2021</year>), p. <fpage>367</fpage>&#x02013;<lpage>76</lpage>. <pub-id pub-id-type="doi">10.1109/ICCV48922.2021.00042</pub-id><pub-id pub-id-type="pmid">37022836</pub-id></citation></ref>
<ref id="B27">
<label>27.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>He</surname> <given-names>K</given-names></name> <name><surname>Zhang</surname> <given-names>X</given-names></name> <name><surname>Ren</surname> <given-names>S</given-names></name> <name><surname>Sun</surname> <given-names>J</given-names></name></person-group>. <article-title>Deep residual learning for image recognition</article-title>. In: <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source>. <publisher-loc>Las Vegas, NV</publisher-loc>: <publisher-name>IEEEE</publisher-name> (<year>2016</year>), p. <fpage>770</fpage>&#x02013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2016.90</pub-id></citation>
</ref>
<ref id="B28">
<label>28.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dosovitskiy</surname> <given-names>A</given-names></name> <name><surname>Beyer</surname> <given-names>L</given-names></name> <name><surname>Kolesnikov</surname> <given-names>A</given-names></name> <name><surname>Weissenborn</surname> <given-names>D</given-names></name> <name><surname>Zhai</surname> <given-names>X</given-names></name> <name><surname>Unterthiner</surname> <given-names>T</given-names></name> <etal/></person-group>. <article-title>An image is worth 16x16 words: transformers for image recognition at scale</article-title>. <source>arXiv</source>. (<year>2020</year>) [Preprint]. arXiv:2010.11929. <pub-id pub-id-type="doi">10.48550/arXiv:2010.11929</pub-id></citation>
</ref>
<ref id="B29">
<label>29.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ba</surname> <given-names>JL</given-names></name> <name><surname>Kiros</surname> <given-names>JR</given-names></name> <name><surname>Hinton</surname> <given-names>GE</given-names></name></person-group>. <article-title>Layer normalization</article-title>. <source>arXiv</source>. (<year>2016</year>) [Preprint]. arXiv:1607.06450. <pub-id pub-id-type="doi">10.48550/arXiv:1607.06450</pub-id></citation>
</ref>
<ref id="B30">
<label>30.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Staal</surname> <given-names>J</given-names></name></person-group>. <article-title>Abr&#x000E0;moff MD, Niemeijer M, Viergever MA, Van Ginneken B. Ridge-based vessel segmentation in color images of the retina</article-title>. <source>IEEE Trans Med Imaging</source>. (<year>2004</year>) <volume>23</volume>:<fpage>501</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2004.825627</pub-id><pub-id pub-id-type="pmid">15084075</pub-id></citation></ref>
<ref id="B31">
<label>31.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fraz</surname> <given-names>MM</given-names></name> <name><surname>Remagnino</surname> <given-names>P</given-names></name> <name><surname>Hoppe</surname> <given-names>A</given-names></name> <name><surname>Uyyanonvara</surname> <given-names>B</given-names></name> <name><surname>Rudnicka</surname> <given-names>AR</given-names></name> <name><surname>Owen</surname> <given-names>CG</given-names></name> <etal/></person-group>. <article-title>An ensemble classification-based approach applied to retinal blood vessel segmentation</article-title>. <source>IEEE Trans Biomed Eng</source>. (<year>2012</year>) <volume>59</volume>:<fpage>2538</fpage>&#x02013;<lpage>48</lpage>. <pub-id pub-id-type="doi">10.1109/TBME.2012.2205687</pub-id><pub-id pub-id-type="pmid">22736688</pub-id></citation></ref>
<ref id="B32">
<label>32.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hoover</surname> <given-names>A</given-names></name> <name><surname>Kouznetsova</surname> <given-names>V</given-names></name> <name><surname>Goldbaum</surname> <given-names>M</given-names></name></person-group>. <article-title>Locating blood vessels in retinal images by piecewise threshold probing of a matched filter response</article-title>. <source>IEEE Trans Med Imaging</source>. (<year>2000</year>) <volume>19</volume>:<fpage>203</fpage>&#x02013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1109/42.845178</pub-id><pub-id pub-id-type="pmid">10875704</pub-id></citation></ref>
<ref id="B33">
<label>33.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Odstrcilik</surname> <given-names>J</given-names></name> <name><surname>Kolar</surname> <given-names>R</given-names></name> <name><surname>Budai</surname> <given-names>A</given-names></name> <name><surname>Hornegger</surname> <given-names>J</given-names></name> <name><surname>Jan</surname> <given-names>J</given-names></name> <name><surname>Gazarek</surname> <given-names>J</given-names></name> <etal/></person-group>. <article-title>Retinal vessel segmentation by improved matched filtering: evaluation on a new high-resolution fundus image database</article-title>. <source>IET Image Process</source>. (<year>2013</year>) <volume>7</volume>:<fpage>373</fpage>&#x02013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1049/iet-ipr.2012.0455</pub-id></citation>
</ref>
<ref id="B34">
<label>34.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cherukuri</surname> <given-names>V</given-names></name> <name><surname>Bg</surname> <given-names>VK</given-names></name> <name><surname>Bala</surname> <given-names>R</given-names></name> <name><surname>Monga</surname> <given-names>V</given-names></name></person-group>. <article-title>Deep retinal image segmentation with regularization under geometric priors</article-title>. <source>IEEE Trans Image Process</source>. (<year>2019</year>) <volume>29</volume>:<fpage>2552</fpage>&#x02013;<lpage>67</lpage>. <pub-id pub-id-type="doi">10.1109/TIP.2019.2946078</pub-id><pub-id pub-id-type="pmid">31613766</pub-id></citation></ref>
<ref id="B35">
<label>35.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Drozdzal</surname> <given-names>M</given-names></name> <name><surname>Vorontsov</surname> <given-names>E</given-names></name> <name><surname>Chartrand</surname> <given-names>G</given-names></name> <name><surname>Kadoury</surname> <given-names>S</given-names></name> <name><surname>Pal</surname> <given-names>C</given-names></name></person-group>. <article-title>The importance of skip connections in biomedical image segmentation</article-title>. In: <source>Deep Learning and Data Labeling for Medical Applications</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2016</year>), p. <fpage>179</fpage>&#x02013;<lpage>87</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-46976-8_19</pub-id></citation>
</ref>
<ref id="B36">
<label>36.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Meng</surname> <given-names>Y</given-names></name> <name><surname>Zhang</surname> <given-names>H</given-names></name> <name><surname>Zhao</surname> <given-names>Y</given-names></name> <name><surname>Gao</surname> <given-names>D</given-names></name> <name><surname>Hamill</surname> <given-names>B</given-names></name> <name><surname>Patri</surname> <given-names>G</given-names></name> <etal/></person-group>. <article-title>Dual consistency enabled weakly and semi-supervised optic disc and cup segmentation with dual adaptive graph convolutional networks</article-title>. <source>IEEE Trans Med Imaging</source>. (<year>2022</year>) <volume>42</volume>:<fpage>416</fpage>&#x02013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2022.3203318</pub-id><pub-id pub-id-type="pmid">36044486</pub-id></citation></ref>
<ref id="B37">
<label>37.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Meng</surname> <given-names>Y</given-names></name> <name><surname>Chen</surname> <given-names>X</given-names></name> <name><surname>Zhang</surname> <given-names>H</given-names></name> <name><surname>Zhao</surname> <given-names>Y</given-names></name> <name><surname>Gao</surname> <given-names>D</given-names></name> <name><surname>Hamill</surname> <given-names>B</given-names></name> <etal/></person-group>. <article-title>Shape-aware weakly/semi-supervised optic disc and cup segmentation with regional/marginal consistency</article-title>. In: <source>International Conference on Medical Image Computing and Computer-Assisted Intervention</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2022</year>), p. <fpage>524</fpage>&#x02013;<lpage>34</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-031-16440-8_50</pub-id></citation>
</ref>
<ref id="B38">
<label>38.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Meng</surname> <given-names>Y</given-names></name> <name><surname>Wei</surname> <given-names>M</given-names></name> <name><surname>Gao</surname> <given-names>D</given-names></name> <name><surname>Zhao</surname> <given-names>Y</given-names></name> <name><surname>Yang</surname> <given-names>X</given-names></name> <name><surname>Huang</surname> <given-names>X</given-names></name> <etal/></person-group>. <article-title>CNN-GCN aggregation enabled boundary regression for biomedical image segmentation</article-title>. In: <source>Medical Image Computing and Computer Assisted Intervention-MICCAI 2020: 23rd International Conference, Lima, Peru, October 4-8, 2020, Proceedings, Part IV 23</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2020</year>), p. <fpage>352</fpage>&#x02013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-59719-1_35</pub-id></citation>
</ref>
<ref id="B39">
<label>39.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Meng</surname> <given-names>Y</given-names></name> <name><surname>Meng</surname> <given-names>W</given-names></name> <name><surname>Gao</surname> <given-names>D</given-names></name> <name><surname>Zhao</surname> <given-names>Y</given-names></name> <name><surname>Yang</surname> <given-names>X</given-names></name> <name><surname>Huang</surname> <given-names>X</given-names></name> <etal/></person-group>. <article-title>Regression of instance boundary by aggregated CNN and GCN</article-title>. In: <source>Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23-28, 2020, Proceedings, Part VIII 16</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2020</year>), p. <fpage>190</fpage>&#x02013;<lpage>207</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-58598-3_12</pub-id></citation>
</ref>
<ref id="B40">
<label>40.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Meng</surname> <given-names>Y</given-names></name> <name><surname>Zhang</surname> <given-names>Y</given-names></name> <name><surname>Xie</surname> <given-names>J</given-names></name> <name><surname>Duan</surname> <given-names>J</given-names></name> <name><surname>Joddrell</surname> <given-names>M</given-names></name> <name><surname>Madhusudhan</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>Multi-granularity learning of explicit geometric constraint and contrast for label-efficient medical image segmentation and differentiable clinical function assessment</article-title>. <source>Med Image Anal</source>. (<year>2024</year>) <volume>95</volume>:<fpage>103183</fpage>. <pub-id pub-id-type="doi">10.1016/j.media.2024.103183</pub-id><pub-id pub-id-type="pmid">38692098</pub-id></citation></ref>
<ref id="B41">
<label>41.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>Z</given-names></name> <name><surname>Shen</surname> <given-names>C</given-names></name> <name><surname>Hengel</surname> <given-names>AD</given-names></name></person-group>. <article-title>Bridging category-level and instance-level semantic image segmentation</article-title>. <source>arXiv</source>. (<year>2016</year>) [Preprint]. arXiv:1605.06885. <pub-id pub-id-type="doi">10.48550/arXiv.1605.06885</pub-id></citation>
</ref>
<ref id="B42">
<label>42.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Y</given-names></name> <name><surname>Meng</surname> <given-names>Y</given-names></name> <name><surname>Zheng</surname> <given-names>Y</given-names></name></person-group>. <article-title>Automatically segment the left atrium and scars from LGE-MRIs using a boundary-focused nnU-Net</article-title>. In: <source>Challenge on Left Atrial and Scar Quantification and Segmentation</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2022</year>), p. <fpage>49</fpage>&#x02013;<lpage>59</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-031-31778-1_5</pub-id></citation>
</ref>
<ref id="B43">
<label>43.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>X</given-names></name> <name><surname>Wang</surname> <given-names>N</given-names></name> <name><surname>Wang</surname> <given-names>Y</given-names></name> <name><surname>Wang</surname> <given-names>X</given-names></name> <name><surname>Nezafat</surname> <given-names>R</given-names></name> <name><surname>Ni</surname> <given-names>D</given-names></name> <etal/></person-group>. <article-title>Combating uncertainty with novel losses for automatic left atrium segmentation</article-title>. In: <source>International Workshop on Statistical Atlases and Computational Models of the Heart</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2018</year>), p. <fpage>246</fpage>&#x02013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-12029-0_27</pub-id></citation>
</ref>
<ref id="B44">
<label>44.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Meng</surname> <given-names>Y</given-names></name> <name><surname>Zhang</surname> <given-names>H</given-names></name> <name><surname>Zhao</surname> <given-names>Y</given-names></name> <name><surname>Yang</surname> <given-names>X</given-names></name> <name><surname>Qiao</surname> <given-names>Y</given-names></name> <name><surname>MacCormick</surname> <given-names>IJ</given-names></name> <etal/></person-group>. <article-title>Graph-based region and boundary aggregation for biomedical image segmentation</article-title>. <source>IEEE Trans Med Imaging</source>. (<year>2021</year>) <volume>41</volume>:<fpage>690</fpage>&#x02013;<lpage>701</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2021.3123567</pub-id><pub-id pub-id-type="pmid">34714742</pub-id></citation></ref>
<ref id="B45">
<label>45.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ronneberger</surname> <given-names>O</given-names></name> <name><surname>Fischer</surname> <given-names>P</given-names></name> <name><surname>Brox</surname> <given-names>T</given-names></name></person-group>. <article-title>U-net: convolutional networks for biomedical image segmentation</article-title>. In: <source>International Conference on Medical Image Computing and Computer-Assisted Intervention</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2015</year>), p. <fpage>234</fpage>&#x02013;<lpage>41</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-24574-4_28</pub-id></citation>
</ref>
<ref id="B46">
<label>46.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>Z</given-names></name> <name><surname>Siddiquee</surname> <given-names>MMR</given-names></name> <name><surname>Tajbakhsh</surname> <given-names>N</given-names></name> <name><surname>Liang</surname> <given-names>J</given-names></name></person-group>. <article-title>UNet&#x0002B;&#x0002B;: a nested U-net architecture for medical image segmentation</article-title>. In: <source>Deep Learning in Medical Image Analysis and Multimodal Learning for Clinical Decision Support</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2018</year>), p. <fpage>3</fpage>&#x02013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-00889-5_1</pub-id><pub-id pub-id-type="pmid">32613207</pub-id></citation></ref>
<ref id="B47">
<label>47.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>Z</given-names></name> <name><surname>Lin</surname> <given-names>Y</given-names></name> <name><surname>Cao</surname> <given-names>Y</given-names></name> <name><surname>Hu</surname> <given-names>H</given-names></name> <name><surname>Wei</surname> <given-names>Y</given-names></name> <name><surname>Zhang</surname> <given-names>Z</given-names></name> <etal/></person-group>. <article-title>Swin transformer: hierarchical vision transformer using shifted windows</article-title>. In: <source>Proceedings of the IEEE/CVF International Conference on Computer Vision</source>. <publisher-loc>Montreal, QC</publisher-loc>: <publisher-name>IEEE</publisher-name> (<year>2021</year>), p. <fpage>10012</fpage>&#x02013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1109/ICCV48922.2021.00986</pub-id></citation>
</ref>
<ref id="B48">
<label>48.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Oktay</surname> <given-names>O</given-names></name> <name><surname>Schlemper</surname> <given-names>J</given-names></name> <name><surname>Folgoc</surname> <given-names>LL</given-names></name> <name><surname>Lee</surname> <given-names>M</given-names></name> <name><surname>Heinrich</surname> <given-names>M</given-names></name> <name><surname>Misawa</surname> <given-names>K</given-names></name> <etal/></person-group>. <article-title>Attention U-net: learning where to look for the pancreas</article-title>. <source>arXiv [Preprint].</source> arXiv:1804.03999 (<year>2018</year>).<pub-id pub-id-type="pmid">35474556</pub-id></citation></ref>
<ref id="B49">
<label>49.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>J</given-names></name> <name><surname>Lu</surname> <given-names>Y</given-names></name> <name><surname>Yu</surname> <given-names>Q</given-names></name> <name><surname>Luo</surname> <given-names>X</given-names></name> <name><surname>Adeli</surname> <given-names>E</given-names></name> <name><surname>Wang</surname> <given-names>Y</given-names></name> <etal/></person-group>. <article-title>Transunet: transformers make strong encoders for medical image segmentation</article-title>. <source>arXiv [Preprint]</source>. arXiv:2102.04306 (<year>2021</year>). <pub-id pub-id-type="doi">10.48550/arXiv:2102.04306</pub-id><pub-id pub-id-type="pmid">37109505</pub-id></citation></ref>
<ref id="B50">
<label>50.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Laibacher</surname> <given-names>T</given-names></name> <name><surname>Weyde</surname> <given-names>T</given-names></name> <name><surname>Jalali</surname> <given-names>S</given-names></name></person-group>. <article-title>M2u-net: effective and efficient retinal vessel segmentation for real-world applications</article-title>. In: <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops</source>. <publisher-loc>Long Beach, CA</publisher-loc>: <publisher-name>IEEE</publisher-name> (<year>2019</year>). <pub-id pub-id-type="doi">10.1109/CVPRW.2019.00020</pub-id></citation>
</ref>
<ref id="B51">
<label>51.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hua</surname> <given-names>D</given-names></name> <name><surname>Xu</surname> <given-names>Y</given-names></name> <name><surname>Zeng</surname> <given-names>X</given-names></name> <name><surname>Yang</surname> <given-names>N</given-names></name> <name><surname>Jiang</surname> <given-names>M</given-names></name> <name><surname>Zhang</surname> <given-names>X</given-names></name> <etal/></person-group>. <article-title>Use of optical coherence tomography angiography for assessment of microvascular changes in the macula and optic nerve head in hypertensive patients without hypertensive retinopathy</article-title>. <source>Microvasc Res</source>. (<year>2020</year>) <volume>129</volume>:<fpage>103969</fpage>. <pub-id pub-id-type="doi">10.1016/j.mvr.2019.103969</pub-id><pub-id pub-id-type="pmid">31874131</pub-id></citation></ref>
<ref id="B52">
<label>52.</label>
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Irshad</surname> <given-names>S</given-names></name> <name><surname>Akram</surname> <given-names>MU</given-names></name></person-group>. <article-title>Classification of retinal vessels into arteries and veins for detection of hypertensive retinopathy</article-title>. In: <source>2014 Cairo International Biomedical Engineering Conference (CIBEC)</source>. <publisher-loc>Giza</publisher-loc>: <publisher-name>IEEE</publisher-name> (<year>2014</year>), p. <fpage>133</fpage>&#x02013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1109/CIBEC.2014.7020937</pub-id></citation>
</ref>
</ref-list>
</back>
</article>