<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Neurosci.</journal-id>
<journal-title>Frontiers in Neuroscience</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Neurosci.</abbrev-journal-title>
<issn pub-type="epub">1662-453X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fnins.2021.662005</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Neuroscience</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Harmonized Segmentation of Neonatal Brain MRI</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Grigorescu</surname> <given-names>Irina</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1079163/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Vanes</surname> <given-names>Lucy</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/594512/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Uus</surname> <given-names>Alena</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1070339/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Batalle</surname> <given-names>Dafnis</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/348239/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Cordero-Grande</surname> <given-names>Lucilio</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Nosarti</surname> <given-names>Chiara</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/50967/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Edwards</surname> <given-names>A. David</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/624337/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Hajnal</surname> <given-names>Joseph V.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1330228/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Modat</surname> <given-names>Marc</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1329601/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Deprez</surname> <given-names>Maria</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1329761/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Centre for the Developing Brain, School of Biomedical Engineering and Imaging Sciences, King&#x00027;s College London</institution>, <addr-line>London</addr-line>, <country>United Kingdom</country></aff>
<aff id="aff2"><sup>2</sup><institution>Biomedical Engineering Department, School of Biomedical Engineering and Imaging Sciences, King&#x00027;s College London</institution>, <addr-line>London</addr-line>, <country>United Kingdom</country></aff>
<aff id="aff3"><sup>3</sup><institution>Department of Child and Adolescent Psychiatry, Institute of Psychiatry, Psychology and Neuroscience, King&#x00027;s College London</institution>, <addr-line>London</addr-line>, <country>United Kingdom</country></aff>
<aff id="aff4"><sup>4</sup><institution>Department of Forensic and Neurodevelopmental Science</institution>, <addr-line>Institute of Psychiatry, Psychology and Neuroscience, King&#x00027;s College London</addr-line>, <addr-line>London</addr-line>, <country>United Kingdom</country></aff>
<aff id="aff5"><sup>5</sup><institution>Biomedical Image Technologies, ETSI Telecomunicaci&#x000F3;n, Universidad Polit&#x000E9;cnica de Madrid &#x00026; CIBER-BNN</institution>, <addr-line>Madrid</addr-line>, <country>Spain</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Diana M. Sima, Icometrix, Belgium</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Ashok Panigrahy, University of Pittsburgh, United States; Adil Bashir, Auburn University, United States</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Irina Grigorescu <email>irina.grigorescu&#x00040;kcl.ac.uk</email></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Brain Imaging Methods, a section of the journal Frontiers in Neuroscience</p></fn></author-notes>
<pub-date pub-type="epub">
<day>25</day>
<month>05</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>15</volume>
<elocation-id>662005</elocation-id>
<history>
<date date-type="received">
<day>31</day>
<month>01</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>21</day>
<month>04</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2021 Grigorescu, Vanes, Uus, Batalle, Cordero-Grande, Nosarti, Edwards, Hajnal, Modat and Deprez.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Grigorescu, Vanes, Uus, Batalle, Cordero-Grande, Nosarti, Edwards, Hajnal, Modat and Deprez</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract><p>Deep learning based medical image segmentation has shown great potential in becoming a key part of the clinical analysis pipeline. However, many of these models rely on the assumption that the train and test data come from the same distribution. This means that such methods cannot guarantee high quality predictions when the source and target domains are dissimilar due to different acquisition protocols, or biases in patient cohorts. Recently, unsupervised domain adaptation techniques have shown great potential in alleviating this problem by minimizing the shift between the source and target distributions, without requiring the use of labeled data in the target domain. In this work, we aim to predict tissue segmentation maps on <italic>T</italic><sub>2</sub>-weighted magnetic resonance imaging data of an unseen preterm-born neonatal population, which has both different acquisition parameters and population bias when compared to our training data. We achieve this by investigating two unsupervised domain adaptation techniques with the objective of finding the best solution for our problem. We compare the two methods with a baseline fully-supervised segmentation network and report our results in terms of Dice scores obtained on our source test dataset. Moreover, we analyse tissue volumes and cortical thickness measures of the harmonized data on a subset of the population matched for gestational age at birth and postmenstrual age at scan. Finally, we demonstrate the applicability of the harmonized cortical gray matter maps with an analysis comparing term and preterm-born neonates and a proof-of-principle investigation of the association between cortical thickness and a language outcome measure.</p></abstract>
<kwd-group>
<kwd>deep learning</kwd>
<kwd>segmentation</kwd>
<kwd>neonatal brain</kwd>
<kwd>unsupervised domain adaptation</kwd>
<kwd>cortical thickness</kwd>
</kwd-group>
<counts>
<fig-count count="13"/>
<table-count count="3"/>
<equation-count count="3"/>
<ref-count count="37"/>
<page-count count="17"/>
<word-count count="9599"/>
</counts>
</article-meta>
<notes notes-type="disclaimer"><p>The views expressed are those of the authors and not necessarily those of the NHS, the NIHR or the Department of Health.</p>
</notes>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1. Introduction</title>
<p>Medical image deep learning has made incredible advances in solving a wide range of scientific problems, including tissue segmentation or image classification (Miotto et al., <xref ref-type="bibr" rid="B21">2018</xref>). However, one major drawback of these methods is their applicability in a clinical setting, as many models rely on the assumption that the source and target domains are drawn from the same distribution. As a result, the efficiency of these models may drop drastically when applied to images which were acquired with acquisition protocols different than the ones used to train the models (Kamnitsas et al., <xref ref-type="bibr" rid="B11">2017</xref>; Orbes-Arteaga et al., <xref ref-type="bibr" rid="B23">2019</xref>).</p>
<p>At the same time, combining imaging data from multiple studies and sites is necessary to increase the sample size and thereby the statistical power of neuroimaging studies. However, one major challenge is the lack of standardization in image acquisition protocols, scanner hardware, and software. Inter-scanner variability has been demonstrated to affect measurements obtained for downstream analysis such as voxel-based morphometry (Takao et al., <xref ref-type="bibr" rid="B32">2011</xref>), and lesion volumes (Shinohara et al., <xref ref-type="bibr" rid="B29">2017</xref>). Therefore, the purpose of harmonizing magnetic resonance imaging (MRI) datasets is to make sure that the differences arising from different image acquisition protocols do not affect the analysis performed on the combined data. For example, volumetric and cortical thickness measures should only be affected by brain anatomy and not the acquisition protocol or scanners.</p>
<p>A class of deep learning methods called domain adaptation (DA) techniques aims to address this issue by suppressing the domain shift between the training and test distributions. In general, DA approaches are either semi-supervised, which assume the existence of labels in the target dataset, or unsupervised, which assume the target dataset has no labels. For example, a common approach is to train a model on source domain images and fine-tune it on target domain data (Ghafoorian et al., <xref ref-type="bibr" rid="B6">2017</xref>; Kushibar et al., <xref ref-type="bibr" rid="B15">2019</xref>). Although these methods can give good results, they can become impractical as more often than not the existence of labels in the target dataset is limited or of poor quality. Unsupervised domain adaptation techniques (Ganin and Lempitsky, <xref ref-type="bibr" rid="B5">2015</xref>; Kerfoot et al., <xref ref-type="bibr" rid="B12">2019</xref>) offer a solution to this problem by minimizing the disparity between a source and a target domain, without requiring the use of labeled data in the target domain.</p>
<p>In our previous work (Grigorescu et al., <xref ref-type="bibr" rid="B7">2020</xref>), we investigated two unsupervised DA methods with the aim of predicting brain tissue segmentations on 2D axial slices of <italic>T</italic>2-weighted (<italic>T</italic>2w) MRI data of an unseen neonatal population. We proposed an additional loss term in one of the methods, in order to constrain the network to more realistic reconstructions. Our models were trained using as source domain a dataset with majority of term-born neonates and as target domain a preterm-only population acquired with a different protocol. We calculated mean cortical thickness measures for every subject in the two datasets and we performed an ANCOVA analysis in order to find group differences between the predicted source and target domains. This analysis showed that our proposed method achieved harmonization of our two datasets in terms of cortical gray matter tissue segmentation maps. In this paper, we build on the aforementioned framework, which we expanded in three main ways. First, we build and train 3D neural networks in order to capture more information about the neonatal brain. Second, we extend the validation of our trained models to subsets of the two cohorts matched for gestational age (GA) at birth and postmenstrual age (PMA) at scan, for which we analyse tissue volumes and global and local cortical thickness (CT) measures. Finally, we perform an analysis comparing term and preterm-born neonates on the harmonized cortical gray matter maps and we show the importance of harmonizing the data by a proof-of-principle investigation of the association between cortical thickness and a language outcome measure.</p></sec>
<sec sec-type="materials and methods" id="s2">
<title>2. Materials and Methods</title>
<sec>
<title>2.1. Data Acquisition and Preprocessing</title>
<p>The <italic>T</italic>2w MRI data used in this study was collected as part of two independent projects: the developing Human Connectome Project (dHCP<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref>, approved by the National Research Ethics Committee REC: 14/Lo/1169), and the Evaluation of Preterm Imaging (ePrime<xref ref-type="fn" rid="fn0002"><sup>2</sup></xref>, REC: 09/H0707/98) study. The dHCP neonates were scanned during natural unsedated sleep at the Evelina London Children&#x00027;s Hospital between 2015 and 2019. The ePrime neonates were scanned at the neonatal intensive care unit in Hammersmith Hospital between 2010 and 2013 (Edwards et al., <xref ref-type="bibr" rid="B4">2018</xref>). Infants with major congenital malformations were excluded from both cohorts.</p>
<p>The dHCP data was acquired using a Philips Achieva 3T scanner and a 32-channels neonatal head coil (Hughes et al., <xref ref-type="bibr" rid="B8">2017</xref>), using a <italic>T</italic>2w turbo spin echo (TSE) sequence with fat suppression, and using the following parameters: repetition time <italic>T</italic><sub><italic>R</italic></sub> &#x0003D; 12 s, echo time <italic>T</italic><sub><italic>E</italic></sub> &#x0003D; 156 ms, TSE factor 12, and SENSE factors of 2.11 for the axial plane and 2.58 for the sagittal plane. Images were acquired with an in-plane resolution of 0.8 &#x000D7; 0.8 mm, slice thickness of 1.6 mm and overlap of 0.8 mm. For each volume, there was an acquisition of 125 slices in the transverse plane and 134 slices in the saggital plane. All data was motion corrected (Kuklisova-Murgasova et al., <xref ref-type="bibr" rid="B14">2012</xref>; Cordero-Grande et al., <xref ref-type="bibr" rid="B3">2018</xref>) and super-resolution reconstructed to a 0.5 mm isotropic resolution (Makropoulos et al., <xref ref-type="bibr" rid="B18">2018</xref>).</p>
<p>The ePrime dataset was acquired with a Philips Intera 3T system and an 8-channel phased array head coil, using a <italic>T</italic>2w TSE sequence with parameters: repetition time <italic>T</italic><sub><italic>R</italic></sub> &#x0003D; 8.67 s, echo time <italic>T</italic><sub><italic>E</italic></sub> &#x0003D; 160 ms, and TSE factor 16. Images were acquired with an in-plane resolution of 0.86 &#x000D7; 0.86 mm, slice thickness of 2 mm and overlap of 1 mm. For each volume, the acquisition ranged between 92 and 106 slices in the transverse plane.</p>
<p>Our two datasets comprise of 403 MRI scans of infants (184 females and 219 males) born between 23 and 42 weeks GA at birth and scanned at term-equivalent age (after 37 weeks PMA) as part of the dHCP pipeline, and a dataset of 486 MRI scans of infants (245 females and 241 males) born between 23 and 33 weeks GA and scanned at term-equivalent age as part of the ePrime project. <xref ref-type="fig" rid="F1">Figure 1</xref> shows their age distribution.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Age distribution of the subjects in our datasets, showing both their GA at birth, as well as their PMA at scan.</p></caption>
<graphic xlink:href="fnins-15-662005-g0001.tif"/>
</fig>
<p>Both datasets were pre-processed prior to being used by the deep learning algorithms. The ePrime volumes were linearly upsampled to 0.5 mm isotropic resolution to match the resolution of our source (dHCP) dataset. Both dHCP and ePrime datasets were rigidly aligned to a common 40 weeks gestational age atlas space (Schuh et al., <xref ref-type="bibr" rid="B28">2018</xref>) using the MIRTK (Rueckert et al., <xref ref-type="bibr" rid="B26">1999</xref>) software toolbox. Then, skull-stripping was performed on all of our data using the brain masks obtained with the Draw-EM pipeline for automatic brain MRI segmentation of the developing neonatal brain (Makropoulos et al., <xref ref-type="bibr" rid="B18">2018</xref>). Tissue segmentation maps were obtained using the same pipeline (Draw-EM) for both (dHCP and ePrime) cohorts.</p>
<p>To train our networks, we split our datasets into 80% training, 10% validation, and 10% test (see <xref ref-type="table" rid="T1">Table 1</xref>), keeping both the distribution of ages at scan and the male-to-female ratio as close to the original as possible. We used the validation sets to keep track of our models&#x00027; performance during training, and the test sets to report our final models&#x00027; results and showcase their capability to generalize.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Number of scans in different datasets used for training, validation and testing the models, together with their mean GA and PMA.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Dataset</bold></th>
<th valign="top" align="center"><bold>&#x00023;Subjects</bold></th>
<th valign="top" align="center"><bold>GA at birth [weeks]</bold></th>
<th valign="top" align="center"><bold>PMA at scan [weeks]</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Train dHCP</td>
<td valign="top" align="center">340 (160&#x02640; &#x0002B; 180&#x02642;)</td>
<td valign="top" align="center">39.1 (&#x000B1;2.7)</td>
<td valign="top" align="center">40.7 (&#x000B1;1.7)</td>
</tr>
<tr>
<td valign="top" align="left">Validate dHCP</td>
<td valign="top" align="center">32 (12&#x02640; &#x0002B; 20&#x02642;)</td>
<td valign="top" align="center">39.3 (&#x000B1;1.6)</td>
<td valign="top" align="center">40.7 (&#x000B1;1.8)</td>
</tr>
<tr>
<td valign="top" align="left">Test dHCP</td>
<td valign="top" align="center">30 (12&#x02640; &#x0002B; 19&#x02642;)</td>
<td valign="top" align="center">30 (&#x000B1;2.4)</td>
<td valign="top" align="center">41.4 (&#x000B1;1.7)</td>
</tr>
<tr>
<td valign="top" align="left">Train ePrime</td>
<td valign="top" align="center">417 (214&#x02640; &#x0002B; 203&#x02642;)</td>
<td valign="top" align="center">29.6 (&#x000B1;2.3)</td>
<td valign="top" align="center">42.9 (&#x000B1;2.6)</td>
</tr>
<tr>
<td valign="top" align="left">Validate ePrime</td>
<td valign="top" align="center">38 (18&#x02640; &#x0002B; 20&#x02642;)</td>
<td valign="top" align="center">29.8 (&#x000B1;2.3)</td>
<td valign="top" align="center">43 (&#x000B1;2.6)</td>
</tr>
<tr>
<td valign="top" align="left">Test ePrime</td>
<td valign="top" align="center">30 (13&#x02640; &#x0002B; 18&#x02642;)</td>
<td valign="top" align="center">30 (&#x000B1;2.4)</td>
<td valign="top" align="center">41.4 (&#x000B1;1.7)</td>
</tr>
</tbody>
</table>
</table-wrap></sec>
<sec>
<title>2.2. Unsupervised Domain Adaptation Models</title>
<p>To investigate the best solution for segmenting our target dataset (ePrime), we compared three independently trained deep learning models:</p>
<list list-type="bullet">
<list-item><p><bold>Baseline</bold>. A 3D U-Net (&#x000C7;i&#x000E7;ek et al., <xref ref-type="bibr" rid="B2">2016</xref>) trained on the source dataset (dHCP) only and used as a baseline segmentation network (see <xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
</list-item>
<list-item><p><bold>Adversarial domain adaptation in the latent space</bold>. A 3D U-Net segmentation network trained on source (dHCP) volumes, coupled with a discriminator trained on both source (dHCP) and target (ePrime) datasets (see <xref ref-type="fig" rid="F3">Figure 3</xref>). This solution is similar to the one proposed by Kamnitsas et al. (<xref ref-type="bibr" rid="B11">2017</xref>) where the aim was to train the segmentation network such that it becomes agnostic to the data domain.</p></list-item>
<list-item><p><bold>Adversarial domain adaptation in the image space</bold>. Two 3D U-Nets, one acting as a generator, and a second one acting as a segmentation network, coupled with a discriminator trained on both real and synthesized ePrime volumes. The segmentation network is trained to produce tissue maps of the synthesized ePrime volumes created by the generator (see <xref ref-type="fig" rid="F4">Figure 4</xref>). The normalized cross correlation (NCC) loss is added to the generator network to enforce image similarity between real and synthesized images, a solution which was previously proposed by Grigorescu et al. (<xref ref-type="bibr" rid="B7">2020</xref>).</p>
</list-item>
</list>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>The baseline model consists of a 3D U-Net trained to segment source (dHCP) volumes. The input <italic>T</italic>2w MRI images, the predicted segmentation and the Draw-EM output segmentations are marked with S as they all belong to the source (dHCP) dataset.</p></caption>
<graphic xlink:href="fnins-15-662005-g0002.tif"/>
</fig>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>The latent space domain adaptation setup consists of a 3D U-Net trained to segment the source (dHCP) <italic>T</italic>2w MRI volumes, coupled with a discriminator network which forces the segmentation network to learn domain-invariant features. Both source (dHCP) and target (ePrime) images are fed to the segmentation network, but only source (dHCP) Draw-EM output labels are used to compute the segmentation loss. Source domain images are marked with S, while target domain images are marked with T, respectively.</p></caption>
<graphic xlink:href="fnins-15-662005-g0003.tif"/>
</fig>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>The image space domain adaptation setup uses a generator network to produce ePrime-like <italic>T</italic>2w MRI images (marked with <inline-formula><mml:math id="M1"><mml:mover accent="false"><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">T</mml:mtext></mml:mstyle></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover></mml:math></inline-formula>), which are then used as input into the segmentation network. The discriminator is trained to distinguish between real (ePrime) and synthesized (ePrime-like) volumes, while the generator is trained to produce realistic images in order to fool the discriminator. The normalised cross correlation (NCC) loss enforces image similarity between real and synthesized volumes.</p></caption>
<graphic xlink:href="fnins-15-662005-g0004.tif"/>
</fig>
<p>To further validate the harmonized tissue maps, we trained an additional network (a 3D U-Net) to segment binary cortical tissue maps into 11 cortical substructures (see <xref ref-type="table" rid="T2">Table 2</xref>) based on anatomical groupings of cortical regions derived from the Draw-EM pipeline. The key reasons for training an extra network are: first, we avoid the time consuming task of label propagation between our available dHCP Draw-EM output segmentations and predicted ePrime maps, and second, we can train this network using Draw-EM cortical segmentations, and apply it on any brain cortical gray matter maps as in this case there will be no intensity shift between target and source distributions.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Grouping of cortical substructures showing their original tissue name obtained from Draw-EM (Makropoulos et al., <xref ref-type="bibr" rid="B18">2018</xref>) on the first column and their corresponding cortical subregion on the second column.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Tissue name</bold></th>
<th valign="top" align="left"><bold>Cortical subregion</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Anterior temporal lobe, medial part left</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Anterior temporal lobe, lateral part left</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Gyri parahippocampalis et ambiens anterior part left</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Superior temporal gyrus, middle part left</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Medial and inferior temporal gyri anterior part left</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Lateral occipitotemporal gyrus, gyrus fusiformis anterior part left</td>
<td valign="top" align="left">Temporal (left)</td>
</tr>
<tr>
<td valign="top" align="left">Gyri parahippocampalis et ambiens posterior part left</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Lateral occipitotemporal gyrus, gyrus fusiformis posterior part left</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Medial and inferior temporal gyri posterior part left</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Superior temporal gyrus, posterior part left</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Anterior temporal lobe, medial part right</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Anterior temporal lobe, lateral part right</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Gyri parahippocampalis et ambiens anterior part right</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Superior temporal gyrus, middle part right</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Medial and inferior temporal gyri anterior part right</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Lateral occipitotemporal gyrus, gyrus fusiformis anterior part right</td>
<td valign="top" align="left">Temporal (right)</td>
</tr>
<tr>
<td valign="top" align="left">Gyri parahippocampalis et ambiens posterior part right</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Lateral occipitotemporal gyrus, gyrus fusiformis posterior part right</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Medial and inferior temporal gyri posterior part right</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Superior temporal gyrus, posterior part right</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Insula left</td>
<td valign="top" align="left">Insula (left)</td>
</tr>
<tr>
<td valign="top" align="left">Insula right</td>
<td valign="top" align="left">Insula (right)</td>
</tr>
<tr>
<td valign="top" align="left">Occipital lobe left</td>
<td valign="top" align="left">Occipital (left)</td>
</tr>
<tr>
<td valign="top" align="left">Occipital lobe right</td>
<td valign="top" align="left">Occipital (right)</td>
</tr>
<tr>
<td valign="top" align="left">Cingulate gyrus, anterior part right</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Cingulate gyrus, anterior part left</td>
<td valign="top" align="left">Cingulate</td>
</tr>
<tr>
<td valign="top" align="left">Cingulate gyrus, posterior part right</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Cingulate gyrus, posterior part left</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Frontal lobe left</td>
<td valign="top" align="left">Frontal (left)</td>
</tr>
<tr>
<td valign="top" align="left">Frontal lobe right</td>
<td valign="top" align="left">Frontal (right)</td>
</tr>
<tr>
<td valign="top" align="left">Parietal lobe left</td>
<td valign="top" align="left">Parietal (left)</td>
</tr>
<tr>
<td valign="top" align="left">Parietal lobe right</td>
<td valign="top" align="left">Parietal (right)</td>
</tr>
</tbody>
</table>
</table-wrap></sec>
<sec>
<title>2.3. Network Architectures</title>
<p>The segmentation networks in all three setups and the generator used in the adversarial domain adaptation in the image space model have the same architecture, consisting of 5 encoding-decoding branches with 16, 32, 64, 128, and 256 channels, respectively. The encoder blocks use 3<sup>3</sup> convolutions (with a stride of 1), instance normalization (Ulyanov et al., <xref ref-type="bibr" rid="B34">2016</xref>) and LeakyReLU activations. A 2<sup>3</sup> average pooling layer is used after the first down-sampling block, while the others use 2<sup>3</sup> max pooling layers. The decoder blocks consist of 3<sup>3</sup> convolutions (with a stride of 1), instance normalization (Ulyanov et al., <xref ref-type="bibr" rid="B34">2016</xref>), LeakyReLU activations, and, additionally, 3<sup>3</sup> transposed convolutions. The number of encoding-decoding blocks, as well as the use of LeakyReLU activations and instance normalization layers, were chosen based on the best practices described in Isensee et al. (<xref ref-type="bibr" rid="B9">2018</xref>). At the same time, the network configurations that we have chosen allowed us to work with the hardware we have at hand (Titan XP 12 GB). The segmentation network outputs a 7-channel 3D volume (of the same size as the input image), corresponding to our 7 classes: background, cerebrospinal fluid (CSF), cortical gray matter (cGM), white matter (WM), deep gray matter (dGM), cerebellum and brainstem. The generator network&#x00027;s last convolutional layer is followed by a Tanh activation and outputs a single channel image.</p>
<p>For our unsupervised domain adaptation models (<xref ref-type="fig" rid="F3">Figures 3</xref>, <xref ref-type="fig" rid="F4">4</xref>) we used a PatchGAN discriminator as proposed in Isola et al. (<xref ref-type="bibr" rid="B10">2017</xref>). Its architecture consists of 5 blocks of 4<sup>3</sup> convolutions (with a stride of 2) with 64, 128, 256, 512, and 1 channels, respectively), instance normalization and LeakyReLU activations.</p>
<p>The cortical parcellation network has the same architecture as the tissue segmentation network, but outputs a 12-channel 3D volume corresponding to the following cortical substructures: frontal left, frontal right, cingulate, temporal left, temporal right, insula left, insula right, parietal left, parietal right, occipital left, and occipital right, respectively. The last class represents the background.</p></sec>
<sec>
<title>2.4. Training</title>
<p>The baseline segmentation network (<xref ref-type="fig" rid="F2">Figure 2</xref>) was trained by minimizing the generalized Dice loss (Sudre et al., <xref ref-type="bibr" rid="B31">2017</xref>) between the predicted and the Draw-EM segmentation maps (Equation 1).</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M2"><mml:mtable class="eqnarray" columnalign="right center left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="-tex-caligraphic">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mi>o</mml:mi><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="-tex-caligraphic">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:mn>2</mml:mn><mml:mfrac><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>M</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mstyle displaystyle="true"><mml:msub><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mstyle><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>M</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mstyle displaystyle="true"><mml:msub><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mstyle><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <inline-formula><mml:math id="M3"><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munder><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> is the weight of the lth tissue type, <italic>p</italic><sub><italic>ln</italic></sub> is the predicted probabilistic map of the lth tissue type at voxel n, <italic>t</italic><sub><italic>ln</italic></sub> is the target label map of the lth tissue type at voxel n, and M is the number of tissue classes. While training, we used the Adam optimizer (Kingma and Ba, <xref ref-type="bibr" rid="B13">2014</xref>) with its default parameters and a decaying cyclical learning rate scheduler (Smith, <xref ref-type="bibr" rid="B30">2017</xref>) with a base learning rate of 2&#x000B7;10<sup>&#x02212;6</sup> and a maximum learning rate of 2&#x000B7;10<sup>&#x02212;3</sup>. The choice of optimizer was based on knowledge of previous image translation literature (Isola et al., <xref ref-type="bibr" rid="B10">2017</xref>; Zhu et al., <xref ref-type="bibr" rid="B37">2017</xref>; Liao et al., <xref ref-type="bibr" rid="B16">2019</xref>; Ranzini et al., <xref ref-type="bibr" rid="B25">2020</xref>) where it yielded good results. At the same time, a varying learning rate during training was shown to improve results in fewer iterations when compared to using a fixed value (Smith, <xref ref-type="bibr" rid="B30">2017</xref>).</p>
<p>The segmentation network from the adversarial domain adaptation in the latent space model was trained to produce tissue maps on the source (dHCP) volumes. In addition, both target (ePrime) and source (dHCP) volumes were fed to the segmentation network, while the feature maps obtained from every level of its decoder arm were passed to the discriminator network which acted as a domain classifier. This was done after either up-sampling or down-sampling the feature maps to match the volume size of the second deepest layer. This model was trained by minimizing a Cross-Entropy loss between predicted and assigned target labels representing our two domains. The final loss function for our second model was therefore made up of the generalized Dice loss and an adversarial loss:</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M4"><mml:mtable class="eqnarray" columnalign="right center left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="-tex-caligraphic">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mi>o</mml:mi><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="-tex-caligraphic">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mi>&#x003B1;</mml:mi><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="-tex-caligraphic">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>v</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>&#x003B1;</italic> was a hyperparameter increased linearly from 0 to 0.05 starting at epoch 20, and which remained equal to 0.05 from epoch 50 onward. Similar to Kamnitsas et al. (<xref ref-type="bibr" rid="B11">2017</xref>) we looked at the behavior of our discriminator and segmentation network when training with different values of <italic>&#x003B1;</italic> &#x02208; [0.02, 0.05, 0.1, 0.2, 0.5]. We found the discriminator&#x00027;s accuracy during training stable for all investigated values, while the segmentation network achieved the lowest loss when <italic>&#x003B1;</italic> &#x0003D; 0.05. The segmentation network was trained similarly to the baseline model, while the discriminator network was trained using the Adam optimizer with <italic>&#x003B2;</italic><sub>1</sub> &#x0003D; 0.5 and <italic>&#x003B2;</italic><sub>2</sub> &#x0003D; 0.999, and a linearly decaying learning rate scheduler starting from 2&#x000B7;10<sup>&#x02212;3</sup>.</p>
<p>The generator network used in the image space domain adaptation approach was trained to produce synthesized ePrime volumes, while the segmentation network was trained using the same loss function, optimizer and learning rate scheduler as in the other two methods. In the previous model (adversarial domain adaptation in the latent space) we fed both dHCP and ePrime volumes to the segmentation network to obtain data agnostic feature maps. For this reason, and to allow for a fair comparison between the two unsupervised domain adaptation models, we trained the segmentation network from the image space model on both real dHCP and synthesized ePrime volumes. For both the discriminator and the generator networks the Adam optimizer with <italic>&#x003B2;</italic><sub>1</sub> &#x0003D; 0.5 and <italic>&#x003B2;</italic><sub>2</sub> &#x0003D; 0.999 was used, together with a linearly decaying learning rate scheduler starting from 2&#x000B7;10<sup>&#x02212;3</sup>. The loss function of the discriminator was similar to that of the Least Squares GAN (Mao et al., <xref ref-type="bibr" rid="B20">2017</xref>): <inline-formula><mml:math id="M5"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="-tex-caligraphic">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>D</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>D</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:mi>S</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>D</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> where <italic>a</italic> signified the label for synthesized volumes and <italic>b</italic> was the label for real volumes. The generator and the segmentation network were trained together using the following loss:</p>
<disp-formula id="E3"><label>(3)</label><mml:math id="M6"><mml:mtable class="eqnarray" columnalign="right center left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="-tex-caligraphic">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mi>o</mml:mi><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="-tex-caligraphic">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="-tex-caligraphic">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>v</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <inline-formula><mml:math id="M7"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="-tex-caligraphic">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>v</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:mi>S</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>D</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula>. An additional NCC loss was used between the real and the generated volumes in order to constrain the generator to produce realistic looking ePrime-like images. Without the additional NCC loss, the generator tends to produce images with an enlarged CSF boundary in order to match the preterm-only distribution found in the ePrime dataset, as was previously shown in Grigorescu et al. (<xref ref-type="bibr" rid="B7">2020</xref>).</p>
<p>These three methods were trained with and without data augmentation for 100 epochs, during which we used the validation sets to inform us about our models&#x00027; performance and to decide on the best performing models. For data augmentation we applied: random affine transformations [with rotation angles <inline-formula><mml:math id="M8"><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0007E;</mml:mo><mml:mrow><mml:mi mathvariant="-tex-caligraphic">U</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>-</mml:mo><mml:mn>1</mml:mn><mml:msup><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">o</mml:mtext></mml:mstyle></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:msup><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">o</mml:mtext></mml:mstyle></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> and/or scaling values <inline-formula><mml:math id="M9"><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0007E;</mml:mo><mml:mrow><mml:mi mathvariant="-tex-caligraphic">U</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>8</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo>.</mml:mo><mml:mn>2</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>], random motion artifacts [corresponding to rotations of <inline-formula><mml:math id="M10"><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0007E;</mml:mo><mml:mrow><mml:mi mathvariant="-tex-caligraphic">U</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>-</mml:mo><mml:msup><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">o</mml:mtext></mml:mstyle></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">o</mml:mtext></mml:mstyle></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> and translations of <inline-formula><mml:math id="M11"><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0007E;</mml:mo><mml:mrow><mml:mi mathvariant="-tex-caligraphic">U</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>-</mml:mo><mml:mn>2</mml:mn><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">mm</mml:mtext></mml:mstyle><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">mm</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>], and random MRI spike and bias field artifacts (P&#x000E9;rez-Garc&#x000ED;a et al., <xref ref-type="bibr" rid="B24">2020</xref>). The cortical parcellation network was trained in a similar fashion as the baseline tissue segmentation network, with data augmentation in the form of random affine transformations (with the same parameters as above).</p>
<p>The test set was used to report our final models&#x00027; results and to showcase their capability to generalize on the source domain. Finally, we produced tissue segmentation maps for all the subjects in our datasets, and used them as input into ANT&#x00027;s DiReCT algorithm (Tustison et al., <xref ref-type="bibr" rid="B33">2013</xref>) to compute cortical thickness measures. To validate our results, we compared cortical thickness measures between subsets of the two cohorts matched for GA and PMA, for which we expect no significant difference in cortical thickness if the harmonization was successful. We also assessed the association between PMA and cortical thickness in the two cohorts.</p></sec></sec>
<sec sec-type="results" id="s3">
<title>3. Results</title>
<sec>
<title>3.1. dHCP Test Dataset</title>
<sec>
<title>3.1.1. Baseline and Domain Adaptation Models</title>
<p>In our first experiment we looked at the performance of the six trained models when applied to the source (dHCP) test dataset. The aim was to assess whether our trained models were able to generalize to unseen source domain (dHCP) data for which we have reliable Draw-EM outputs. <xref ref-type="fig" rid="F5">Figure 5</xref> summarizes the results of our trained models, showing mean Dice scores, mean Hausdorff distance calculated using SimpleITK (Lowekamp et al., <xref ref-type="bibr" rid="B17">2013</xref>; Yaniv et al., <xref ref-type="bibr" rid="B36">2018</xref>), precision and recall. These metrics were computed between the predicted tissue segmentation maps and the Draw-EM output labels for each of the six trained models. The model that obtained the best score is highlighted with the yellow diamond for each metric and tissue type. In terms of Dice scores, out of the six models, the <italic>baseline with augmentation</italic> and <italic>image with augmentation</italic> methods performed best on the source domain test dataset for CSF, dGM, cerebellum and brainstem, with no significant difference between them. For cGM and WM, the best performance was obtained by the <italic>baseline with augmentation</italic> model, while the domain adaptation methods showed a slight decrease in performance. The three models trained without augmentation always performed significantly worse than their augmented counterparts.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>The results on our dHCP test dataset for all six methods. The yellow diamond highlights the model which obtained the best mean score for its respective tissue type and metric. Models which obtained non-significant differences when compared to the best performing method are shown above each pair.</p></caption>
<graphic xlink:href="fnins-15-662005-g0005.tif"/>
</fig>
<p>In terms of average Hausdorff distance, both the <italic>baseline with augmentation</italic> and <italic>image with augmentation</italic> models performed well, while the <italic>latent without augmentation</italic> model performed worse than all the other models for all tissue types. Highest precision scores were obtained by the <italic>baseline with augmentation</italic> model for both CSF and WM, the <italic>image without augmentation</italic> method for both cGM and brainstem, the <italic>baseline without augmentation</italic> for dGM, and the <italic>latent with augmentation</italic> model for cerebellum. Highest recall scores were obtained by the <italic>baseline with augmentation</italic> model for cGM and cerebellum, the <italic>latent with augmentation</italic> model for WM, dGM and brainstem, and the <italic>latent without augmentation</italic> model for CSF. These results show that our trained models were able to generalize to unseen source domain data, and that the performance on the dHCP dataset was not compromised by using domain adaption techniques.</p></sec>
<sec>
<title>3.1.2. Cortical Parcellation Network</title>
<p>To assess the performance of our trained cortical parcellation network, we applied it on the source (dHCP) test dataset, where the inputs were binary Draw-EM cortical gray matter tissue maps. For each subject in our test dataset, the network produced a 12-channel output, consisting of: frontal left, frontal right, cingulate, temporal left, temporal right, insula left, insula right, parietal left, parietal right, occipital left, occipital right, and background, respectively. <xref ref-type="table" rid="T3">Table 3</xref> summarizes these results in terms of minimum, maximum and mean Dice scores for each of the 11 cortical substructures. When compared with the Draw-EM outputs (Makropoulos et al., <xref ref-type="bibr" rid="B18">2018</xref>), the network obtained an overall mean Dice score of 0.97.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Dice Scores obtained on the dHCP test set for the trained cortical parcellation network.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Tissue</bold></th>
<th valign="top" align="center"><bold>Min</bold></th>
<th valign="top" align="center"><bold>Max</bold></th>
<th valign="top" align="center"><bold>Mean</bold></th>
<th valign="top" align="left"><bold>Tissue</bold></th>
<th valign="top" align="center"><bold>Min</bold></th>
<th valign="top" align="center"><bold>Max</bold></th>
<th valign="top" align="center"><bold>Mean</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Frontal (left)</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">0.99</td>
<td valign="top" align="center">0.99</td>
<td valign="top" align="left">Frontal (right)</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">0.99</td>
<td valign="top" align="center">0.99</td>
</tr>
<tr>
<td valign="top" align="left">Temporal (left)</td>
<td valign="top" align="center">0.96</td>
<td valign="top" align="center">0.99</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="left">Temporal (right)</td>
<td valign="top" align="center">0.97</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">0.98</td>
</tr>
<tr>
<td valign="top" align="left">Insula (left)</td>
<td valign="top" align="center">0.95</td>
<td valign="top" align="center">0.97</td>
<td valign="top" align="center">0.96</td>
<td valign="top" align="left">Insula (right)</td>
<td valign="top" align="center">0.95</td>
<td valign="top" align="center">0.97</td>
<td valign="top" align="center">0.96</td>
</tr>
<tr>
<td valign="top" align="left">Parietal (left)</td>
<td valign="top" align="center">0.96</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">0.97</td>
<td valign="top" align="left">Parietal (right)</td>
<td valign="top" align="center">0.96</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">0.97</td>
</tr>
<tr>
<td valign="top" align="left">Occipital (left)</td>
<td valign="top" align="center">0.94</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">0.97</td>
<td valign="top" align="left">Occipital (right)</td>
<td valign="top" align="center">0.95</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">0.97</td>
</tr>
<tr>
<td valign="top" align="left">Cingulate</td>
<td valign="top" align="center">0.93</td>
<td valign="top" align="center">0.97</td>
<td valign="top" align="center">0.96</td>
</tr>
</tbody>
</table>
</table-wrap></sec></sec>
<sec>
<title>3.2. Validation of Data Harmonization</title>
<p>In order to evaluate the extent to which each of the trained models managed to harmonize the segmentation maps of the two cohorts, we looked at tissue volumes and mean cortical thickness measures between subsamples of the dHCP (<italic>N</italic> &#x0003D; 30; median GA  &#x0003D; 30.50 weeks; median PMA  &#x0003D; 41.29 weeks) and ePrime (<italic>N</italic> &#x0003D; 30; median GA  &#x0003D; 30.64 weeks; median PMA  &#x0003D; 41.29 weeks) cohort which showed comparable GA at birth and PMA at time of scan (see <xref ref-type="table" rid="T1">Table 1</xref>). A direct comparison between the two cohort subsets shows that the dHCP and ePrime neonates did not differ significantly in terms of sex [&#x003C7;<sup>2</sup>(1) &#x0003C; 0.001, <italic>p</italic> &#x0003E; 0.05], or maternal ethnicity [&#x003C7;<sup>2</sup>(4) &#x0003D; 4.32, <italic>p</italic> &#x0003E; 0.05], coded as &#x0201C;white or white British,&#x0201D; &#x0201C;black or black British,&#x0201D; &#x0201C;asian or asian British,&#x0201D; &#x0201C;mixed race,&#x0201D; and &#x0201C;other.&#x0201D; As a proxy for socio-economic status, we derived an Index of Multiple Deprivation (IMD) score based on parental postcode at the time of infant birth (Department for Communities and Local Government, 2011<xref ref-type="fn" rid="fn0003"><sup>3</sup></xref>). This measure is based on seven domains of deprivation within each neighborhood compared to all others in the country: income, employment, education, skills and training, health and disability, barriers to housing and services, living environment and crime. Higher IMD values therefore indicate higher deprivation. IMD score did not differ significantly between dHCP (<italic>M</italic> &#x0003D; 21.4, <italic>SD</italic> &#x0003D; 10.7) and ePrime (<italic>M</italic> &#x0003D; 18.0, <italic>SD</italic> &#x0003D; 11.6) subsets, suggesting that these two groups are comparable in terms of environmental background.</p>
<p>For these two cohort subsamples with similar GA and PMA, we expected both volumes and cortical thickness measures not to differ after applying the harmonization procedures. We also investigated the relationship between PMA and volumes and cortical thickness respectively, before and after applying the harmonization. Linear regressions were performed in the comparable data subsets testing the effects of PMA and cohort on volumes (or cortical thickness), controlling for GA and sex.</p>
<sec>
<title>3.2.1. Volumes</title>
<p><xref ref-type="fig" rid="F6">Figure 6</xref> shows the tissue volumes for both the original and the predicted segmentations. Significant volume differences between the two subsamples (i.e., significant effect of cohort in the regression model) are reported above each tested model. To summarize, the <italic>image with augmentation</italic> model performed best, by showing no significant differences in the two cohorts for cortical gray matter, white matter, deep gray matter, cerebellum and brainstem. The cerebrospinal fluid volumes were significantly different between the two cohorts for all our trained models, as well as for the original ePrime segmentation masks.</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>Comparison of volume measures for our six tissue types (CSF, cGM, WM, dGM, cerebellum, and brainstem) between original Draw-EM dHCP segmentations and original Draw-EM ePrime segmentations (first column), or between original Draw-EM dHCP segmentations and ePrime segmentations obtained with the six trained models (columns 2&#x02013;7). Linear regressions were performed in the comparable data subsets testing the effects of cohort on volumes, controlling for PMA, GA, and sex (volume &#x0007E; cohort &#x0002B; PMA &#x0002B; GA &#x0002B; sex). The asterisks indicate a statistically significant effect of cohort in the linear regression.</p></caption>
<graphic xlink:href="fnins-15-662005-g0006.tif"/>
</fig></sec>
<sec>
<title>3.2.2. Cortical Thickness</title>
<p><xref ref-type="fig" rid="F7">Figure 7</xref> summarizes the results of applying the cortical thickness algorithm on the predicted segmentation maps for all six methods. Before harmonization, the matched subsets from the dHCP and ePrime cohorts showed a significant difference in mean cortical thickness [dHCP: <italic>M</italic> &#x0003D; 1.73, <italic>SD</italic> &#x0003D; 0.12; ePrime: <italic>M</italic> &#x0003D; 1.93, <italic>SD</italic> &#x0003D; 0.13; <italic>t</italic>(58) &#x0003D; 6.33, <italic>p</italic> &#x0003C; 0.001]. After applying the harmonization to the ePrime sample, mean cortical thickness no longer differed between the two subsamples for four of our methods. These results are summarized in panel H from <xref ref-type="fig" rid="F7">Figure 7</xref>, where the models which obtained harmonized values in terms of mean cortical thickness measures are shown in bold. <xref ref-type="fig" rid="F7">Figure 7</xref> also shows the association between PMA and mean crtical thickness before (<xref ref-type="fig" rid="F7">Figure 7A</xref>) and after applying the models (<xref ref-type="fig" rid="F7">Figures 7B&#x02013;G</xref>) on the matched dHCP and ePrime subsets. A linear model regressing unharmonized mean cortical thickness on PMA, GA, sex, and cohort revealed a significant effect of cohort (<italic>&#x003B2;</italic> &#x0003D; 0.20; <italic>p</italic> &#x0003C; 0.001), consistent with a group difference in mean cortical thickness reported above, as well as a significant effect of PMA (<italic>&#x003B2;</italic> &#x0003D; 0.04; <italic>p</italic> &#x0003C; 0.001), consistent with an increase in cortical thickness with increasing PMA. After applying the methods, the effect of cohort was rendered non-significant for four of the methods (see highlighted panels C, E, F, G from <xref ref-type="fig" rid="F7">Figure 7</xref>), while the effect of PMA remained stable across all six methods.</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>The association between PMA and mean cortical thickness before <bold>(A)</bold> and after <bold>(B&#x02013;G)</bold> applying the data harmonization models on the matched dHCP and ePrime subsets. A linear model regressing mean cortical thickness measures on PMA, GA, sex, and cohort revealed a significant effect of cohort for the original segmentations <bold>(A)</bold>, and the predicted maps (<bold>B</bold> - <italic>baseline without augmentation</italic> and <bold>D</bold> - <italic>latent without augmentation</italic>). The effect of cohort was rendered non-significant for four of the methods (<bold>C</bold> - <italic>baseline with augmentation</italic>, <bold>E</bold> - <italic>latent with augmentation</italic>, <bold>F</bold> - <italic>image without augmentation</italic>, and <bold>G</bold> - <italic>image with augmentation</italic>). <bold>(H)</bold> summarizes cortical thickness measures before and after applying the models.</p></caption>
<graphic xlink:href="fnins-15-662005-g0007.tif"/>
</fig>
<p>We performed a similar analysis on thickness measures of the cortical substructures. To obtain these measures, we used the original and the predicted cortical gray matter segmentation maps (obtained by applying each of our six methods) as input to the trained cortical parcellation network to predict cortical substructure masks. We then used these masks to calculate local cortical thickness measures. Our results are summarized in <xref ref-type="fig" rid="F8">Figure 8</xref>.</p>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>Comparison of local mean cortical thickness measures between original Draw-EM dHCP segmentations and original Draw-EM ePrime segmentations (first column), or between original Draw-EM dHCP segmentations and ePrime segmentations obtained with the six trained models (columns 2&#x02013;7). Linear regressions were performed in the comparable data subsets testing the effects of cohort on local cortical thickness measures, controlling for PMA, GA, and sex (CT &#x0007E; cohort &#x0002B; PMA &#x0002B; GA &#x0002B; sex). The asterisks indicate a statistically significant effect of cohort in the linear regression.</p></caption>
<graphic xlink:href="fnins-15-662005-g0008.tif"/>
</fig></sec>
<sec>
<title>3.2.3. Example Predictions</title>
<p>To further narrow down which of the four remaining methods was best at harmonizing our ePrime neonatal dataset, we looked at the predicted segmentations. <xref ref-type="fig" rid="F9">Figure 9</xref> shows two example neonates from the ePrime dataset with GA  &#x0003D; 32.9 w, PMA  &#x0003D; 43.6 w, and with GA  &#x0003D; 28.7 w, PMA  &#x0003D; 44.7 w, respectively. The first column shows <italic>T</italic>2w saggittal and axial slices, respectively, while the following four columns show example tissue prediction maps produced by the four models: <italic>baseline with augmentation, latent with augmentation, image</italic>, and <italic>image with augmentation</italic>, respectively. Although all four methods performed well in terms of harmonizing tissue segmentation volumes and global mean cortical thickness values for the two subsamples with similar GA and PMA, previously presented quantitative results as well as the example above suggest that the <italic>image with augmentation</italic> method was more robust.</p>
<fig id="F9" position="float">
<label>Figure 9</label>
<caption><p>Example predicted segmentation maps for the best performing models. On the first row we show an example where three of the models (<italic>baseline with augmentation, latent with augmentation</italic>, and <italic>image</italic>) misclassified a part of the cortex as being deep gray matter. This is more pronounced in the <italic>baseline with augmentation</italic> model, while the <italic>latent with augmentation</italic> and <italic>image</italic> show a slight improvement. The <italic>image with augmentation</italic> model corrected the problem entirely. On the second row the yellow arrow points to an area of CSF where the <italic>baseline with augmentation</italic> model misclassified it as dGM, while the other three models did not have this problem. The red arrow on the other hand points to an area where the <italic>latent with augmentation</italic> model misclassified cGM as deep gray matter. This problem does not appear in the other models.</p></caption>
<graphic xlink:href="fnins-15-662005-g0009.tif"/>
</fig>
<p>Finally, <xref ref-type="fig" rid="F10">Figure 10</xref> shows the axial, sagittal and coronal slices of an ePrime neonate (GA  &#x0003D; 32.86 w and PMA  &#x0003D; 39.86 w). The first line shows the <italic>T</italic>2w MR image, while the second and third lines show the CSF boundary of both the Draw-EM algorithm and the <italic>image with augmentation</italic> method. The green arrows point to a WM region which was misclassified by the Draw-EM pipeline as CSF. This problem was then corrected by the <italic>image with augmentation</italic> method.</p>
<fig id="F10" position="float">
<label>Figure 10</label>
<caption><p>Example of a neonate from the ePrime dataset with GA  &#x0003D; 32.86 w and PMA  &#x0003D; 39.86 w where the Draw-EM algorithm performed worse than our proposed <italic>image with augmentation</italic> model. The green arrow points at a region which was segmented as CSF by Draw-EM, but then corrected by our model.</p></caption>
<graphic xlink:href="fnins-15-662005-g0010.tif"/>
</fig></sec></sec>
<sec>
<title>3.3. Analysis of Harmonized Cortical Substructures</title>
<p>In this section we analyze the harmonized cortical gray matter segmentation maps using the <italic>image with augmentation</italic> model. We produce tissue segmentation maps for the entire ePrime dataset and calculate cortical thickness measures on the predicted and Draw-EM cortical gray matter tissue maps of both cohorts. In addition, we use the trained cortical parcellation network to produce cortical substructure masks. We perform a term <italic>vs</italic> preterm analysis on the harmonized cortical gray matter maps and we show the importance of harmonizing the data with a proof-of-principle application setting where we investigate the association between cortical thickness and a language outcome measure.</p>
<sec>
<title>3.3.1. Comparison of Term and Preterm Cortical Maps</title>
<p>Associations between cortical thickness and GA or PMA in the full dHCP and ePrime datasets (excluding subjects with PMA &#x0003E;45 weeks) for the whole cortex are depicted in <xref ref-type="fig" rid="F11">Figure 11</xref>, where we show individual regression lines for preterm-born and term-born neonates. The first column consists of dHCP-only subjects, while the following two columns showcase both cohorts together, before and after harmonizing the cortical gray matter tissue maps.</p>
<fig id="F11" position="float">
<label>Figure 11</label>
<caption><p>Mean cortical thickness measures in our dHCP dataset (first column), and in both cohorts before (second column) and after (third column) harmonizing the tissue segmentation maps. The first row plots the cortical thickness measures against GA, while the second row plots the cortical thickness measures against PMA, with individual regression lines on top.</p></caption>
<graphic xlink:href="fnins-15-662005-g0011.tif"/>
</fig>
<p>A linear model regressing dHCP-only mean cortical thickness on PMA, GA, sex, birth weight and the interaction between PMA and GA revealed a significant effect of PMA (<italic>&#x003B2;</italic> &#x0003D; 0.19; <italic>p</italic> &#x0003C; 0.001), a significant effect of GA (<italic>&#x003B2;</italic> &#x0003D; 0.16; <italic>p</italic> &#x0003D; 0.002), and a significant effect of the interaction between PMA and GA (<italic>&#x003B2;</italic> &#x0003D; &#x02212;0.004; <italic>p</italic> &#x0003D; 0.002), indicating that infants born at a lower GA showed a stronger relationship between PMA and CT. When performing the same analysis in the pooled ePrime and dHCP data before harmonizing the maps, the effect of GA and the effect of the interaction were rendered not significant (GA: <italic>&#x003B2;</italic> &#x0003D; 0.009; <italic>p</italic> &#x0003D; 0.7 and PMA&#x0002A;GA: <italic>&#x003B2;</italic> &#x0003D; &#x02212;0.0006; <italic>p</italic> &#x0003D; 0.5, respectively). This is corrected after harmonizing the tissue maps, where the effects of GA (<italic>&#x003B2;</italic> &#x0003D; 0.06; <italic>p</italic> &#x0003D; 0.02) and the effects of the GA and PMA interaction (<italic>&#x003B2;</italic> &#x0003D; &#x02212;0.001; <italic>p</italic> &#x0003D; 0.02) are, again, significant.</p>
<p>The second and third columns of <xref ref-type="fig" rid="F11">Figure 11</xref> show that after harmonizing the tissue segmentation maps, the ePrime preterm-born neonates (green dots) are brought downwards into a comparable range of values to the dHCP preterms (red dots). Moreover, when plotting the cortical thickness measures against PMA, after harmonizing the tissue maps, the intersection between the two individual regression lines (term and preterm-born neonates) happens at roughly the same age (PMA &#x0003D; 38.5 weeks) as in the dHCP-only dataset.</p>
<p>We extended the term <italic>vs</italic> preterm analysis on cortical thickness substructures. <xref ref-type="fig" rid="F12">Figure 12</xref> shows the results of applying a linear model regressing mean cortical thickness measures on PMA, GA, sex, birth weight and prematurity, where significant differences (<italic>p</italic> &#x0003C; 0.05) between the two cohorts (term and preterm-born neonates) are highlighted in the image.</p>
<fig id="F12" position="float">
<label>Figure 12</label>
<caption><p>Comparison of cortical thickness measures for the whole cortex and for each of the 11 cortical subregions between term and preterm-born neonates. The results of the linear regression are reported in the table in terms of differences between term and preterm-born neonates.</p></caption>
<graphic xlink:href="fnins-15-662005-g0012.tif"/>
</fig></sec>
<sec>
<title>3.3.2. Behavioral Outcome Association</title>
<p>As a final proof-of-principle, we demonstrate the importance of data harmonization in an application setting investigating the association between neonatal cortical thickness and a behavioral outcome measure. For this, we consider language abilities as assessed between 18 and 24 months in both dHCP and ePrime cohorts using the Bayley Scales of Infant and Toddler Development (Bayley, <xref ref-type="bibr" rid="B1">2006</xref>). Age-normed composite language scores were available for 203 toddlers from the dHCP cohort (M = 96.43; SD = 14.89) and 136 toddlers from the ePrime cohort (M = 91.25; SD = 17.37). For the neonatal cortical thickness measure, we focus on the left and right frontal cortex for illustration.</p>
<p>Regressing composite language score against left or right frontal cortical thickness in each cohort separately, controlling for PMA, GA, sex and intracranial volume showed that there was no significant association between neonatal left/right frontal cortical thickness and language abilities at toddler age in either of the cohorts. However, when pooling data from both cohorts together and rerunning the same analysis (using un-harmonized cortical thickness measures), a significant association between left/right frontal cortical thickness and language abilities is seen (left: <italic>&#x003B2;</italic> &#x0003D; &#x02212;17.56, <italic>p</italic> &#x0003C; 0.05, right: <italic>&#x003B2;</italic> &#x0003D; &#x02212;18.76, <italic>p</italic> &#x0003C; 0.05), suggesting that greater frontal cortical thickness at term-equivalent age is associated with reduced language abilities at toddler age.</p>
<p>However, as can be seen in <xref ref-type="fig" rid="F13">Figure 13</xref>, this is likely a spurious effect due to (artifactually) heightened cortical thickness values in un-harmonized ePrime data combined with lower language composite scores in the ePrime cohort (consistent with effects typically observed in preterm cohorts). Indeed, when rerunning the same analysis on harmonized data pooled across both cohorts, the effect of cortical thickness on language ability is rendered non-significant in both left (<italic>&#x003B2;</italic> &#x0003D; &#x02212;13.99, <italic>p</italic> &#x0003D; 0.15) and right (<italic>&#x003B2;</italic> &#x0003D; &#x02212;16.69, <italic>p</italic> &#x0003D; 0.068) frontal cortex, consistent with the ground-truth findings in each individual cohort.</p>
<fig id="F13" position="float">
<label>Figure 13</label>
<caption><p>Language composite score against predicted left and right frontal cortical thickness measures before and after harmonizing the tissue segmentation maps. Without harmonization (columns 1 and 3) there appears to be a significant association between left or right frontal cortical thickness and language abilities, but after harmonization (columns 2 and 4) the effect of cortical thickness on language ability is rendered non-significant in both left and right frontal cortex. This demonstrates the importance of data harmonization without which pooling images from separate datasets can lead to spurious findings that are driven by differences in acquisitions rather than by true underlying effects.</p></caption>
<graphic xlink:href="fnins-15-662005-g0013.tif"/>
</fig></sec></sec></sec>
<sec id="s4">
<title>4. Discussion and Future Work</title>
<p>In this paper we studied the application and viability of unsupervised domain adaptation methods for harmonizing tissue segmentation maps of two neonatal datasets (dHCP and ePrime). Our aim was to obtain volumetric and cortical thickness measures that are only affected by brain anatomy and not by the acquisition protocol or scanner, in order to improve the statistical power of imaging or imaging-genetic studies. We proposed an image-based domain adaptation model where a tissue segmentation network was trained with real dHCP and synthesized ePrime <italic>T</italic>2w 3D MRI volumes. The generator network was trained to produce realistic images in order to fool a domain discriminator, while also minimizing an NCC loss which aimed to enforce image similarity between real and synthesized images (Grigorescu et al., <xref ref-type="bibr" rid="B7">2020</xref>). We trained this model using dHCP Draw-EM segmentation maps, and we compared it with a baseline 3D U-Net (&#x000C7;i&#x000E7;ek et al., <xref ref-type="bibr" rid="B2">2016</xref>), and a latent space domain adaptation method (Kamnitsas et al., <xref ref-type="bibr" rid="B11">2017</xref>). The three methods were trained with and without data augmentation (P&#x000E9;rez-Garc&#x000ED;a et al., <xref ref-type="bibr" rid="B24">2020</xref>).</p>
<p>First, we looked at the performance of each of the six trained models on the source (dHCP) test dataset, by comparing predicted tissue segmentation maps with the Draw-EM output labels, with the aim of measuring fidelity of our trained segmentation methods for the original dHCP domain. Our results on the source (dHCP) test dataset suggest that our trained models were able to generalize to unseen source domain data. At the same time, Dice score results on the test set for the proposed <italic>image with augmentation</italic> model are high and are similar in performance when compared with the <italic>baseline with augmentation</italic> method. This suggests that adding the contrast transfer step does not diminish the quality of the segmentations.</p>
<p>We then analyzed the extent to which each of the 6 trained models managed to harmonize the tissue segmentation maps of our two cohorts, by looking at tissue volumes and mean cortical thickness measures between subsamples of the dHCP and ePrime cohorts which showed comparable GA at birth and PMA at time of scan, as well as similar gender and maternal ethnicity. Our results showed that our proposed model (<italic>image with augmentation</italic>) harmonized the predicted tissue segmentation maps in terms of cortical gray matter, white matter, deep gray matter, cerebellum and brainstem volumes (<xref ref-type="fig" rid="F6">Figure 6</xref>). In terms of mean global cortical thickness measures, four of the trained methods (<italic>baseline with augmentation, latent with augmentation, image</italic>, and <italic>image with augmentation</italic>) achieved comparable values when compared to the dHCP subset. In fact, we hypothesize that these four methods provided the best overall results because either they were trained using data augmentation or they acted as a deep learning-based augmentation technique (Sandfort et al., <xref ref-type="bibr" rid="B27">2019</xref>), which made the segmentation network more robust to the different contrast, population bias and acquisition protocol of the ePrime dataset.</p>
<p>Using the cortical parcellation network, we also produced cortical thickness measures for the 11 cortical subregions (see <xref ref-type="table" rid="T2">Table 2</xref>). Again, the models trained with augmentation performed better than their no augmentation counterparts (see <xref ref-type="fig" rid="F8">Figure 8</xref>). However, our proposed <italic>image with augmentation</italic> model performed best, whereby ePrime values, tending toward higher values before harmonization, were brought downwards into a comparable range of values to dHCP, for 10 out of 11 cortical subregions (see <xref ref-type="fig" rid="F8">Figure 8</xref> last column). For the right parietal lobe, our proposed method outperformed the original segmentations and the other 5 models, but did not manage to bring the values down to a non-significant range. One potential reason for this is that, on a visual inspection, the ePrime cohort appears to suffer from more partial volume artifacts than its dHCP counterpart, which can confuse the segmentation network and can lead to overestimation of the cortical gray matter/cerebrospinal fluid boundary.</p>
<p>A close inspection of the predicted tissue segmentation maps (see <xref ref-type="fig" rid="F9">Figure 9</xref>) also showed that our proposed model (<italic>image with augmentation</italic>) corrected misclassified voxels which were prevalent in the other 3 methods. At the same time, the proposed <italic>image with augmentation</italic> method outperformed the original Draw-EM segmentation by correcting a region of WM which was wrongly classified as CSF (see <xref ref-type="fig" rid="F10">Figure 10</xref>). Our results suggest that, in terms of consistency of volumes and regional cortical thickness measures derived from dHCP and ePrime neonates (<xref ref-type="fig" rid="F6">Figures 6</xref>, <xref ref-type="fig" rid="F8">8</xref>), as well as the qualitative examples (<xref ref-type="fig" rid="F9">Figures 9</xref>, <xref ref-type="fig" rid="F10">10</xref>), our proposed <italic>image with augmentation</italic> model resulted in more consistent outputs than the other methods.</p>
<p>We used the harmonized cortical segmentation maps to look at differences in both global and local cortical thickness measures between term and preterm-born neonates. We showed in <xref ref-type="fig" rid="F12">Figure 12</xref> that our harmonized cortical gray matter maps resulted in global thickness measures which were comparable with the dHCP-only neonates, while also revealing a significant effect of GA and the interaction between age at scan and at birth. We performed a similar analysis on the local cortical thickness measures and highlighted three regions of interest (frontal left, frontal right, and parietal left) which showed significant differences between the two cohorts (see <xref ref-type="fig" rid="F12">Figure 12</xref>). These regions are consistent with previous studies (Nagy et al., <xref ref-type="bibr" rid="B22">2011</xref>) where cortical thickness measures were shown to differ in preterm-born neonates when compared to term-born neonates in an adolescent cohort.</p>
<p>Finally, we showed the importance of harmonizing the cortical tissue maps by investigating the association between neonatal cortical thickness and a language outcome measure. After harmonization, regressing language composite score against predicted left or right frontal cortical thickness in the two pooled datasets, showed no significant effect of cortical thickness (second column of <xref ref-type="fig" rid="F13">Figure 13</xref>), consistent with the ground-truth results seen in each cohort individually. This analysis demonstrates that without data harmonization, pooling images from separate datasets can lead to spurious findings that are driven by systematic differences in acquisitions rather than by true underlying effects. Our harmonization allows for our two datasets to be combined into joint analyses while preserving the underlying structure of associations with real-world outcomes.</p>
<p>Our study was focused on single-source unsupervised domain adaptation approaches, which might limit application in terms of applying the method to a different neonatal dataset. However, by utilizing reliable tissue segmentation maps from multiple neonatal databases, the proposed model can be extended to a multi-source domain adaptation pipeline (Mansour et al., <xref ref-type="bibr" rid="B19">2008</xref>; Xu et al., <xref ref-type="bibr" rid="B35">2018</xref>). Additionally, the latent based domain adaptation method was trained using the features at every layer of the decoding branch, without analyzing different combinations of the encoding-decoding layers. Future work will therefore aim to systematically evaluate our design choices via ablation studies. At the same time, we focused our work on investigating structural (<italic>T</italic>2w) datasets only, and in future we aim to extend this study to harmonize diffusion data as well.</p></sec>
<sec sec-type="data-availability-statement" id="s5">
<title>Data Availability Statement</title>
<p>The code developed for this study are available online on Github (<ext-link ext-link-type="uri" xlink:href="https://github.com/irinagrigorescu">https://github.com/irinagrigorescu</ext-link>). Imaging data collected for the dHCP are available in early 2021 at <ext-link ext-link-type="uri" xlink:href="http://developingconnectome.org/">http://developingconnectome.org/</ext-link>. Requests for Data Sharing for the ePrime dataset should be made to the Chief Investigator of (Edwards et al., <xref ref-type="bibr" rid="B4">2018</xref>) doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/archdischild-2017-313102">10.1136/archdischild-2017-313102</ext-link>.</p></sec>
<sec id="s6">
<title>Ethics Statement</title>
<p>The studies involving human participants were approved by the National Research Ethics Committee (dHCP, REC: 14/Lo/1169; EPrime, REC: 09/H0707/98). Informed written consent was given by parents prior to scanning.</p></sec>
<sec id="s7">
<title>Author Contributions</title>
<p>IG prepared the manuscript, implemented the code for the domain adaptation models and the analysis. LV participated in the implementation of the analysis code, the study design and interpretation of the results. AU assisted with data preprocessing, design of the study and interpretation of the results. DB performed preprocessing of the dHCP and ePrime datasets. LC-G developed MRI acquisition protocols for the neonatal dHCP datasets. CN participated in the study design and interpretation of the results. ADE and JVH are coordinators of the dHCP project. MM supervised all stages of the current research. MD conceptualized the study, supervised all stages of the current research and preparation of the manuscript. All authors gave final approval for publication and agree to be held accountable for the work performed therein.</p></sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
</body>
<back>
<ack><p>We thank everyone who was involved in acquisition and analysis of the datasets. We thank all participants and their families. This paper is an extension of our previous work (Grigorescu et al., <xref ref-type="bibr" rid="B7">2020</xref>).</p>
</ack>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Bayley</surname> <given-names>N.</given-names></name></person-group> (<year>2006</year>). <source>Bayley Scales of Infant and Toddler Development</source>. <publisher-loc>PsychCorp; Pearson. San Antonio, TX</publisher-loc>: <publisher-name>The Psychological Corporation</publisher-name>.</citation></ref>
<ref id="B2">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>&#x000C7;i&#x000E7;ek</surname> <given-names>&#x000D6;.</given-names></name> <name><surname>Abdulkadir</surname> <given-names>A.</given-names></name> <name><surname>Lienkamp</surname> <given-names>S. S.</given-names></name> <name><surname>Brox</surname> <given-names>T.</given-names></name> <name><surname>Ronneberger</surname> <given-names>O.</given-names></name></person-group> (<year>2016</year>). <article-title>3d u-net: learning dense volumetric segmentation from sparse annotation,</article-title> in <source>International Conference on Medical Image Computing and Computer-Assisted Intervention</source> (<publisher-loc>Athens; Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>424</fpage>&#x02013;<lpage>432</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-46723-8_49</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cordero-Grande</surname> <given-names>L.</given-names></name> <name><surname>Hughes</surname> <given-names>E. J.</given-names></name> <name><surname>Hutter</surname> <given-names>J.</given-names></name> <name><surname>Price</surname> <given-names>A. N.</given-names></name> <name><surname>Hajnal</surname> <given-names>J. V.</given-names></name></person-group> (<year>2018</year>). <article-title>Three-dimensional motion corrected sensitivity encoding reconstruction for multi-shot multi-slice MRI: application to neonatal brain imaging</article-title>. <source>Magn. Reson. Med</source>. <volume>79</volume>, <fpage>1365</fpage>&#x02013;<lpage>1376</lpage>. <pub-id pub-id-type="doi">10.1002/mrm.26796</pub-id><pub-id pub-id-type="pmid">28626962</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Edwards</surname> <given-names>A. D.</given-names></name> <name><surname>Redshaw</surname> <given-names>M. E.</given-names></name> <name><surname>Kennea</surname> <given-names>N.</given-names></name> <name><surname>Rivero-Arias</surname> <given-names>O.</given-names></name> <name><surname>Gonzales-Cinca</surname> <given-names>N.</given-names></name> <name><surname>Nongena</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Effect of mri on preterm infants and their families: a randomised trial with nested diagnostic and economic evaluation</article-title>. <source>Arch. Dis. Childhood Fetal Neonatal Ed.</source> <volume>103</volume>, <fpage>F15</fpage>&#x02013;<lpage>F21</lpage>. <pub-id pub-id-type="doi">10.1136/archdischild-2017-313102</pub-id><pub-id pub-id-type="pmid">28988160</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ganin</surname> <given-names>Y.</given-names></name> <name><surname>Lempitsky</surname> <given-names>V.</given-names></name></person-group> (<year>2015</year>). <article-title>Unsupervised domain adaptation by backpropagation,</article-title> in <source>International Conference on machine learning. Proceedings of the 32nd International Conference on Machine Learning</source>, Vol. <volume>37</volume>, <fpage>1180</fpage>&#x02013;<lpage>1189</lpage>.</citation></ref>
<ref id="B6">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ghafoorian</surname> <given-names>M.</given-names></name> <name><surname>Mehrtash</surname> <given-names>A.</given-names></name> <name><surname>Kapur</surname> <given-names>T.</given-names></name> <name><surname>Karssemeijer</surname> <given-names>N.</given-names></name> <name><surname>Marchiori</surname> <given-names>E.</given-names></name> <name><surname>Pesteie</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Transfer learning for domain adaptation in MRI: application in brain lesion segmentation,</article-title> in <source>International Conference on Medical Image Computing and Computer-Assisted Intervention</source> (<publisher-loc>Quebec City, QC; Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>516</fpage>&#x02013;<lpage>524</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-66179-7_59</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Grigorescu</surname> <given-names>I.</given-names></name> <name><surname>Cordero-Grande</surname> <given-names>L.</given-names></name> <name><surname>Batalle</surname> <given-names>D.</given-names></name> <name><surname>Edwards</surname> <given-names>A. D.</given-names></name> <name><surname>Hajnal</surname> <given-names>J. V.</given-names></name> <name><surname>Modat</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Harmonised segmentation of neonatal brain MRI: a domain adaptation approach,</article-title> in <source>Medical Ultrasound, and Preterm, Perinatal and Paediatric Image Analysis</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer International Publishing</publisher-name>), <fpage>253</fpage>&#x02013;<lpage>263</lpage>.</citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hughes</surname> <given-names>E. J.</given-names></name> <name><surname>Winchman</surname> <given-names>T.</given-names></name> <name><surname>Padormo</surname> <given-names>F.</given-names></name> <name><surname>Teixeira</surname> <given-names>R.</given-names></name> <name><surname>Wurie</surname> <given-names>J.</given-names></name> <name><surname>Sharma</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>A dedicated neonatal brain imaging system</article-title>. <source>Magn. Reson. Med</source>. <volume>78</volume>, <fpage>794</fpage>&#x02013;<lpage>804</lpage>. <pub-id pub-id-type="doi">10.1002/mrm.26462</pub-id><pub-id pub-id-type="pmid">27643791</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Isensee</surname> <given-names>F.</given-names></name> <name><surname>Petersen</surname> <given-names>J.</given-names></name> <name><surname>Klein</surname> <given-names>A.</given-names></name> <name><surname>Zimmerer</surname> <given-names>D.</given-names></name> <name><surname>Jaeger</surname> <given-names>P. F.</given-names></name> <name><surname>Kohl</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>nnu-net: self-adapting framework for u-net-based medical image segmentation</article-title>. <source>arXiv preprint</source> arXiv:1809.10486. <pub-id pub-id-type="doi">10.1007/978-3-658-25326-4_7</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Isola</surname> <given-names>P.</given-names></name> <name><surname>Zhu</surname> <given-names>J. Y.</given-names></name> <name><surname>Zhou</surname> <given-names>T.</given-names></name> <name><surname>Efros</surname> <given-names>A. A.</given-names></name></person-group> (<year>2017</year>). <article-title>Image-to-image translation with conditional adversarial networks,</article-title> in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source>, <fpage>1125</fpage>&#x02013;<lpage>1134</lpage>.</citation></ref>
<ref id="B11">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kamnitsas</surname> <given-names>K.</given-names></name> <name><surname>Baumgartner</surname> <given-names>C.</given-names></name> <name><surname>Ledig</surname> <given-names>C.</given-names></name> <name><surname>Newcombe</surname> <given-names>V.</given-names></name> <name><surname>Simpson</surname> <given-names>J.</given-names></name> <name><surname>Kane</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Unsupervised domain adaptation in brain lesion segmentation with adversarial networks,</article-title> in <source>Information Processing in Medical Imaging</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer International Publishing</publisher-name>).</citation></ref>
<ref id="B12">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kerfoot</surname> <given-names>E.</given-names></name> <name><surname>Puyol-Ant&#x000F3;n</surname> <given-names>E.</given-names></name> <name><surname>Ruijsink</surname> <given-names>B.</given-names></name> <name><surname>Ariga</surname> <given-names>R.</given-names></name> <name><surname>Zacur</surname> <given-names>E.</given-names></name> <name><surname>Lamata</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Synthesising images and labels between MR sequence types with CycleGAN,</article-title> in <source>Domain Adaptation and Representation Transfer and Medical Image Learning With Less Labels and Imperfect Data</source> (<publisher-loc>Shenzhen; Cham</publisher-loc>: <publisher-name>Springer</publisher-name>). <pub-id pub-id-type="doi">10.1007/978-3-030-33391-1_6</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kingma</surname> <given-names>D. P.</given-names></name> <name><surname>Ba</surname> <given-names>J.</given-names></name></person-group> (<year>2014</year>). <article-title>Adam: a method for stochastic optimization</article-title>. <source>arXiv [Preprint]</source> arXiv:1412.6980.</citation></ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kuklisova-Murgasova</surname> <given-names>M.</given-names></name> <name><surname>Quaghebeur</surname> <given-names>G.</given-names></name> <name><surname>Rutherford</surname> <given-names>M. A.</given-names></name> <name><surname>Hajnal</surname> <given-names>J. V.</given-names></name> <name><surname>Schnabel</surname> <given-names>J. A.</given-names></name></person-group> (<year>2012</year>). <article-title>Reconstruction of fetal brain MRI with intensity matching and complete outlier removal</article-title>. <source>Med. Image Anal</source>. <volume>16</volume>, <fpage>1550</fpage>&#x02013;<lpage>1564</lpage>. <pub-id pub-id-type="doi">10.1016/j.media.2012.07.004</pub-id><pub-id pub-id-type="pmid">22939612</pub-id></citation></ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kushibar</surname> <given-names>K.</given-names></name> <name><surname>Valverde</surname> <given-names>S.</given-names></name> <name><surname>Gonz&#x000E1;lez-Vill&#x000E0;</surname> <given-names>S.</given-names></name> <name><surname>Bernal</surname> <given-names>J.</given-names></name> <name><surname>Cabezas</surname> <given-names>M.</given-names></name> <name><surname>Oliver</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Supervised domain adaptation for automatic sub-cortical brain structure segmentation with minimal user interaction</article-title>. <source>Sci. Rep.</source> <volume>9</volume>, <fpage>1</fpage>&#x02013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-019-43299-z</pub-id><pub-id pub-id-type="pmid">31043688</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liao</surname> <given-names>H.</given-names></name> <name><surname>Lin</surname> <given-names>W.-A.</given-names></name> <name><surname>Zhou</surname> <given-names>S. K.</given-names></name> <name><surname>Luo</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>Adn: artifact disentanglement network for unsupervised metal artifact reduction</article-title>. <source>IEEE Trans. Med. Imaging</source> <volume>39</volume>, <fpage>634</fpage>&#x02013;<lpage>643</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2019.2933425</pub-id><pub-id pub-id-type="pmid">31395543</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lowekamp</surname> <given-names>B.</given-names></name> <name><surname>Chen</surname> <given-names>D.</given-names></name> <name><surname>Ib&#x000E1;&#x000F1;ez</surname> <given-names>L.</given-names></name> <name><surname>Blezek</surname> <given-names>D.</given-names></name></person-group> (<year>2013</year>). <article-title>The design of SimpleITK</article-title>. <source>Front. Neuroinform.</source> <volume>7</volume>:<fpage>45</fpage>. <pub-id pub-id-type="doi">10.3389/fninf.2013.00045</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Makropoulos</surname> <given-names>A.</given-names></name> <name><surname>Robinson</surname> <given-names>E. C.</given-names></name> <name><surname>Schuh</surname> <given-names>A.</given-names></name> <name><surname>Wright</surname> <given-names>R.</given-names></name> <name><surname>Fitzgibbon</surname> <given-names>S.</given-names></name> <name><surname>Bozek</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>The developing human connectome project: a minimal processing pipeline for neonatal cortical surface reconstruction</article-title>. <source>Neuroimage</source> <volume>173</volume>, <fpage>88</fpage>&#x02013;<lpage>112</lpage>. <pub-id pub-id-type="doi">10.1016/j.neuroimage.2018.01.054</pub-id><pub-id pub-id-type="pmid">29409960</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mansour</surname> <given-names>Y.</given-names></name> <name><surname>Mohri</surname> <given-names>M.</given-names></name> <name><surname>Rostamizadeh</surname> <given-names>A.</given-names></name></person-group> (<year>2008</year>). <article-title>Domain adaptation with multiple sources</article-title>. <source>Adv. Neural Inform. Process. Syst.</source> <volume>21</volume>, <fpage>1041</fpage>&#x02013;<lpage>1048</lpage>.</citation></ref>
<ref id="B20">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Mao</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>Q.</given-names></name> <name><surname>Xie</surname> <given-names>H.</given-names></name> <name><surname>Lau</surname> <given-names>R.Y.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Paul Smolley</surname> <given-names>S.</given-names></name></person-group> (<year>2017</year>). <article-title>Least squares generative adversarial networks,</article-title> in <source>Proceedings of the IEEE International Conference on Computer Vision</source>, <fpage>2794</fpage>&#x02013;<lpage>2802</lpage>.<pub-id pub-id-type="pmid">30273144</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Miotto</surname> <given-names>R.</given-names></name> <name><surname>Wang</surname> <given-names>F.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Jiang</surname> <given-names>X.</given-names></name> <name><surname>Dudley</surname> <given-names>J. T.</given-names></name></person-group> (<year>2018</year>). <article-title>Deep learning for healthcare: review, opportunities and challenges</article-title>. <source>Brief. Bioinform.</source> <volume>19</volume>, <fpage>1236</fpage>&#x02013;<lpage>1246</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbx044</pub-id><pub-id pub-id-type="pmid">28481991</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nagy</surname> <given-names>Z.</given-names></name> <name><surname>Lagercrantz</surname> <given-names>H.</given-names></name> <name><surname>Hutton</surname> <given-names>C.</given-names></name></person-group> (<year>2011</year>). <article-title>Effects of preterm birth on cortical thickness measured in adolescence</article-title>. <source>Cereb. Cortex</source> <volume>21</volume>, <fpage>300</fpage>&#x02013;<lpage>306</lpage>. <pub-id pub-id-type="doi">10.1093/cercor/bhq095</pub-id><pub-id pub-id-type="pmid">20522538</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Orbes-Arteaga</surname> <given-names>M.</given-names></name> <name><surname>Varsavsky</surname> <given-names>T.</given-names></name> <name><surname>Sudre</surname> <given-names>C. H.</given-names></name> <name><surname>Eaton-Rosen</surname> <given-names>Z.</given-names></name> <name><surname>Haddow</surname> <given-names>L. J.</given-names></name> <name><surname>S&#x000F8;rensen</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Multi-domain adaptation in brain MRI through paired consistency and adversarial learning,</article-title> in <source>Domain Adaptation and Representation Transfer and Medical Image Learning With Less Labels and Imperfect Data</source>, eds <person-group person-group-type="editor"><name><surname>Wang</surname> <given-names>Q.</given-names></name> <name><surname>Milletari</surname> <given-names>F.</given-names></name> <name><surname>Nguyen</surname> <given-names>H. V.</given-names></name> <name><surname>Albarqouni</surname> <given-names>S.</given-names></name> <name><surname>Cardoso</surname> <given-names>M. J.</given-names></name> <name><surname>Rieke</surname> <given-names>N.</given-names></name> <name><surname>Xu</surname> <given-names>Z.</given-names></name> <name><surname>Kamnitsas</surname> <given-names>K.</given-names></name> <name><surname>Patel</surname> <given-names>V.</given-names></name> <name><surname>Roysam</surname> <given-names>B.</given-names></name> <name><surname>Jiang</surname> <given-names>S.</given-names></name> <name><surname>Zhou</surname> <given-names>K.</given-names></name> <name><surname>Luu</surname> <given-names>K.</given-names></name> <name><surname>Le</surname> <given-names>N.</given-names></name></person-group> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer International Publishing</publisher-name>), <fpage>54</fpage>&#x02013;<lpage>62</lpage>.</citation></ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>P&#x000E9;rez-Garc&#x000ED;a</surname> <given-names>F.</given-names></name> <name><surname>Sparks</surname> <given-names>R.</given-names></name> <name><surname>Ourselin</surname> <given-names>S.</given-names></name></person-group> (<year>2020</year>). <article-title>TorchIO: a Python library for efficient loading, preprocessing, augmentation and patch-based sampling of medical images in deep learning</article-title>. <source>arXiv [Preprint]</source> arXiv:2003.04696.</citation></ref>
<ref id="B25">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ranzini</surname> <given-names>M. B. M.</given-names></name> <name><surname>Groothuis</surname> <given-names>I.</given-names></name> <name><surname>Kl&#x000E4;ser</surname> <given-names>K.</given-names></name> <name><surname>Cardoso</surname> <given-names>M. J.</given-names></name> <name><surname>Henckel</surname> <given-names>J.</given-names></name> <name><surname>Ourselin</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Combining multimodal information for metal artefact reduction: an unsupervised deep learning framework,</article-title> in <source>2020 IEEE 17th International Symposium on Biomedical Imaging (ISBI)</source> (<publisher-loc>Iowa City, IA</publisher-loc>), <fpage>600</fpage>&#x02013;<lpage>604</lpage>. <pub-id pub-id-type="doi">10.1109/ISBI45749.2020.9098633</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rueckert</surname> <given-names>D.</given-names></name> <name><surname>Sonoda</surname> <given-names>L. I.</given-names></name> <name><surname>Hayes</surname> <given-names>C.</given-names></name> <name><surname>Hill</surname> <given-names>D. L. G.</given-names></name> <name><surname>Leach</surname> <given-names>M. O.</given-names></name> <name><surname>Hawkes</surname> <given-names>D. J.</given-names></name></person-group> (<year>1999</year>). <article-title>Nonrigid registration using free-form deformations: application to breast MR images</article-title>. <source>IEEE Trans. Med. Imaging</source> <volume>18</volume>, <fpage>712</fpage>&#x02013;<lpage>721</lpage>. <pub-id pub-id-type="doi">10.1109/42.796284</pub-id><pub-id pub-id-type="pmid">10534053</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sandfort</surname> <given-names>V.</given-names></name> <name><surname>Yan</surname> <given-names>K.</given-names></name> <name><surname>Pickhardt</surname> <given-names>P. J.</given-names></name> <name><surname>Summers</surname> <given-names>R. M.</given-names></name></person-group> (<year>2019</year>). <article-title>Data augmentation using generative adversarial networks (cyclegan) to improve generalizability in ct segmentation tasks</article-title>. <source>Sci. Rep.</source> <volume>9</volume>, <fpage>1</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-019-52737-x</pub-id><pub-id pub-id-type="pmid">31729403</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schuh</surname> <given-names>A.</given-names></name> <name><surname>Makropoulos</surname> <given-names>A.</given-names></name> <name><surname>Robinson</surname> <given-names>E. C.</given-names></name> <name><surname>Cordero-Grande</surname> <given-names>L.</given-names></name> <name><surname>Hughes</surname> <given-names>E.</given-names></name> <name><surname>Hutter</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Unbiased construction of a temporally consistent morphological atlas of neonatal brain development</article-title>. <source>bioRxiv.</source> <fpage>251512</fpage>. <pub-id pub-id-type="doi">10.1101/251512</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shinohara</surname> <given-names>R. T.</given-names></name> <name><surname>Oh</surname> <given-names>J.</given-names></name> <name><surname>Nair</surname> <given-names>G.</given-names></name> <name><surname>Calabresi</surname> <given-names>P. A.</given-names></name> <name><surname>Davatzikos</surname> <given-names>C.</given-names></name> <name><surname>Doshi</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Volumetric analysis from a harmonized multisite brain mri study of a single subject with multiple sclerosis</article-title>. <source>Am. J. Neuroradiol.</source> <volume>38</volume>, <fpage>1501</fpage>&#x02013;<lpage>1509</lpage>. <pub-id pub-id-type="doi">10.3174/ajnr.A5254</pub-id><pub-id pub-id-type="pmid">28642263</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Smith</surname> <given-names>L. N.</given-names></name></person-group> (<year>2017</year>). <article-title>Cyclical learning rates for training neural networks,</article-title> in <source>2017 IEEE Winter Conference on Applications of Computer Vision (WACV)</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>464</fpage>&#x02013;<lpage>472</lpage>.</citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sudre</surname> <given-names>C. H.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name> <name><surname>Vercauteren</surname> <given-names>T.</given-names></name> <name><surname>Ourselin</surname> <given-names>S.</given-names></name> <name><surname>Jorge Cardoso</surname> <given-names>M.</given-names></name></person-group> (<year>2017</year>). <article-title>Generalised dice overlap as a deep learning loss function for highly unbalanced segmentations</article-title>. <source>Lecture Notes Comput. Sci</source>. <pub-id pub-id-type="doi">10.1007/978-3-319-67558-9_28</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Takao</surname> <given-names>H.</given-names></name> <name><surname>Hayashi</surname> <given-names>N.</given-names></name> <name><surname>Ohtomo</surname> <given-names>K.</given-names></name></person-group> (<year>2011</year>). <article-title>Effect of scanner in longitudinal studies of brain volume changes</article-title>. <source>J. Magn. Reson. Imaging</source> <volume>34</volume>, <fpage>438</fpage>&#x02013;<lpage>444</lpage>. <pub-id pub-id-type="doi">10.1002/jmri.22636</pub-id><pub-id pub-id-type="pmid">21692137</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Tustison</surname> <given-names>N. J.</given-names></name> <name><surname>Avants</surname> <given-names>B. B.</given-names></name> <name><surname>Cook</surname> <given-names>P. A.</given-names></name> <name><surname>Song</surname> <given-names>G.</given-names></name> <name><surname>Das</surname> <given-names>S.</given-names></name> <name><surname>van Strien</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>The ANTs cortical thickness processing pipeline,</article-title> in <source>Medical Imaging 2013: Biomedical Applications in Molecular, Structural, and Functional Imaging</source> (<publisher-loc>Lake Buena Vista, FL</publisher-loc>: <publisher-name>PIE Medical Imaging</publisher-name>). <pub-id pub-id-type="doi">10.1117/12.2007128</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ulyanov</surname> <given-names>D.</given-names></name> <name><surname>Vedaldi</surname> <given-names>A.</given-names></name> <name><surname>Lempitsky</surname> <given-names>V.</given-names></name></person-group> (<year>2016</year>). <article-title>Instance normalization: the missing ingredient for fast stylization</article-title>. <source>arXiv [Preprint]</source> arXiv:1607.08022.</citation></ref>
<ref id="B35">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>R.</given-names></name> <name><surname>Chen</surname> <given-names>Z.</given-names></name> <name><surname>Zuo</surname> <given-names>W.</given-names></name> <name><surname>Yan</surname> <given-names>J.</given-names></name> <name><surname>Lin</surname> <given-names>L.</given-names></name></person-group> (<year>2018</year>). <article-title>Deep cocktail network: multi-source unsupervised domain adaptation with category shift,</article-title> in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source> (<publisher-loc>Salt Lake City, UT</publisher-loc>), <fpage>3964</fpage>&#x02013;<lpage>3973</lpage>.</citation></ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yaniv</surname> <given-names>Z.</given-names></name> <name><surname>Lowekamp</surname> <given-names>B. C.</given-names></name> <name><surname>Johnson</surname> <given-names>H. J.</given-names></name> <name><surname>Beare</surname> <given-names>R.</given-names></name></person-group> (<year>2018</year>). <article-title>SimpleITK image-analysis notebooks: a collaborative environment for education and reproducible research</article-title>. <source>J. Digit. Imaging</source> <volume>31</volume>, <fpage>290</fpage>&#x02013;<lpage>303</lpage>. <pub-id pub-id-type="doi">10.1007/s10278-017-0037-8</pub-id><pub-id pub-id-type="pmid">31485952</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>J.-Y.</given-names></name> <name><surname>Park</surname> <given-names>T.</given-names></name> <name><surname>Isola</surname> <given-names>P.</given-names></name> <name><surname>Efros</surname> <given-names>A. A.</given-names></name></person-group> (<year>2017</year>). <article-title>Unpaired image-to-image translation using cycle-consistent adversarial networks,</article-title> in <source>Proceedings of the IEEE International Conference on Computer Vision</source> (<publisher-loc>Venice</publisher-loc>), <fpage>2223</fpage>&#x02013;<lpage>2232</lpage>.</citation></ref>
</ref-list>
<fn-group>
<fn id="fn0001"><p><sup>1</sup><ext-link ext-link-type="uri" xlink:href="http://www.developingconnectome.org/">http://www.developingconnectome.org/</ext-link></p></fn>
<fn id="fn0002"><p><sup>2</sup><ext-link ext-link-type="uri" xlink:href="https://www.npeu.ox.ac.uk/prumhc/eprime-mr-imaging-177">https://www.npeu.ox.ac.uk/prumhc/eprime-mr-imaging-177</ext-link></p></fn>
<fn id="fn0003"><p><sup>3</sup><ext-link ext-link-type="uri" xlink:href="https://tools.npeu.ox.ac.uk/imd/">https://tools.npeu.ox.ac.uk/imd/</ext-link></p></fn>
</fn-group>
<fn-group>
<fn fn-type="financial-disclosure"><p><bold>Funding.</bold> This work was supported by the Academy of Medical Sciences Springboard Award [SBF004\1040], Medical Research Council (Grant nos. [MR/K006355/1] and [MR/S026460/1]), European Research Council under the European Union&#x00027;s Seventh Framework Programme [FP7/20072013]/ERC grant agreement no. 319456 dHCP project, the EPSRC Research Council as part of the EPSRC DTP (grant Ref: [EP/R513064/1]), the Wellcome/EPSRC Centre for Medical Engineering at King&#x00027;s College London [WT 203148/Z/16/Z], the NIHR Clinical Research Facility (CRF) at Guy&#x00027;s and St Thomas&#x00027;, and by the National Institute for Health Research Biomedical Research Centre based at Guy&#x00027;s and St Thomas&#x00027; NHS Foundation Trust and King&#x00027;s College London. The EPrime study was funded by the National Institute for Health Research (NIHR) under its Programme Grants for Applied Research Programme (grant reference no. [RP-PG-0707-10154]).</p>
</fn>
</fn-group>
</back>
</article>