<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Oncol.</journal-id>
<journal-title>Frontiers in Oncology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Oncol.</abbrev-journal-title>
<issn pub-type="epub">2234-943X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fonc.2022.974467</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Oncology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Improving radiomic model reliability using robust features from perturbations for head-and-neck carcinoma</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Teng</surname>
<given-names>Xinzhi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1595945"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Jiang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1098769"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ma</surname>
<given-names>Zongrui</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Yuanpeng</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lam</surname>
<given-names>Saikit</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/756311"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Wen</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1949596"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xiao</surname>
<given-names>Haonan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1511061"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Tian</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1638731"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Bing</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1122789"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhou</surname>
<given-names>Ta</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ren</surname>
<given-names>Ge</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1178712"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lee</surname>
<given-names>Francis Kar-ho</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Au</surname>
<given-names>Kwok-hung</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/789594"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lee</surname>
<given-names>Victor Ho-fun</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/893342"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chang</surname>
<given-names>Amy Tien Yee</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Cai</surname>
<given-names>Jing</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/639997"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Health Technology and Informatics, The Hong Kong Polytechnic University</institution>, <addr-line>Hong Kong</addr-line>, <country>Hong Kong SAR, China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Clinical Oncology, Queen Elizabeth Hospital</institution>, <addr-line>Hong Kong</addr-line>, <country>Hong Kong SAR, China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Clinical Oncology, The University of Hong Kong</institution>, <addr-line>Hong Kong</addr-line>, <country>Hong Kong SAR, China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Comprehensive Oncology Centre, Hong Kong Sanatorium and Hospital</institution>, <addr-line>Hong Kong</addr-line>, <country>Hong Kong SAR, China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Max Heiland, Charit&#xe9; Universit&#xe4;tsmedizin Berlin, Germany</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Aditya Apte, Memorial Sloan Kettering Cancer Center, United States; Chenbin Liu, Chinese Academy of Medical Sciences and Peking Union Medical College, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Jing Cai, <email xlink:href="mailto:jing.cai@polyu.edu.hk">jing.cai@polyu.edu.hk</email>
</p>
</fn>
<fn fn-type="other" id="fn002">
<p>This article was submitted to Head and Neck Cancer, a section of the journal Frontiers in Oncology</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>14</day>
<month>10</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>12</volume>
<elocation-id>974467</elocation-id>
<history>
<date date-type="received">
<day>21</day>
<month>06</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>09</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Teng, Zhang, Ma, Zhang, Lam, Li, Xiao, Li, Li, Zhou, Ren, Lee, Au, Lee, Chang and Cai</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Teng, Zhang, Ma, Zhang, Lam, Li, Xiao, Li, Li, Zhou, Ren, Lee, Au, Lee, Chang and Cai</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Background</title>
<p>Using high robust radiomic features in modeling is recommended, yet its impact on radiomic model is unclear. This study evaluated the radiomic model&#x2019;s robustness and generalizability after screening out low-robust features before radiomic modeling. The results were validated with four datasets and two clinically relevant tasks.</p>
</sec>
<sec>
<title>Materials and methods</title>
<p>A total of 1,419 head-and-neck cancer patients&#x2019; computed tomography images, gross tumor volume segmentation, and clinically relevant outcomes (distant metastasis and local-regional recurrence) were collected from four publicly available datasets. The perturbation method was implemented to simulate images, and the radiomic feature robustness was quantified using intra-class correlation of coefficient (ICC). Three radiomic models were built using all features (ICC &gt; 0), good-robust features (ICC &gt; 0.75), and excellent-robust features (ICC &gt; 0.95), respectively. A filter-based feature selection and Ridge classification method were used to construct the radiomic models. Model performance was assessed with both robustness and generalizability. The robustness of the model was evaluated by the ICC, and the generalizability of the model was quantified by the train-test difference of Area Under the Receiver Operating Characteristic Curve (AUC).</p>
</sec>
<sec>
<title>Results</title>
<p>The average model robustness ICC improved significantly from 0.65 to 0.78 (P&lt; 0.0001) using good-robust features and to 0.91 (P&lt; 0.0001) using excellent-robust features. Model generalizability also showed a substantial increase, as a closer gap between training and testing AUC was observed where the mean train-test AUC difference was reduced from 0.21 to 0.18 (P&lt; 0.001) in good-robust features and to 0.12 (P&lt; 0.0001) in excellent-robust features. Furthermore, good-robust features yielded the best average AUC in the unseen datasets of 0.58 (P&lt; 0.001) over four datasets and clinical outcomes.</p>
</sec>
<sec>
<title>Conclusions</title>
<p>Including robust only features in radiomic modeling significantly improves model robustness and generalizability in unseen datasets. Yet, the robustness of radiomic model has to be verified despite building with robust radiomic features, and tightly restricted feature robustness may prevent the optimal model performance in the unseen dataset as it may lower the discrimination power of the model.</p>
</sec>
</abstract>
<kwd-group>
<kwd>radiomics</kwd>
<kwd>head and neck squamous cell carcinoma</kwd>
<kwd>model reliability</kwd>
<kwd>feature reliability</kwd>
<kwd>model robustness</kwd>
</kwd-group>
<contract-sponsor id="cn001">Hong Kong Polytechnic University<named-content content-type="fundref-id">10.13039/501100004377</named-content>
</contract-sponsor>
<contract-sponsor id="cn002">Hong Kong Polytechnic University<named-content content-type="fundref-id">10.13039/501100004377</named-content>
</contract-sponsor>
<counts>
<fig-count count="8"/>
<table-count count="4"/>
<equation-count count="1"/>
<ref-count count="38"/>
<page-count count="13"/>
<word-count count="5097"/>
</counts>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<title>Introduction</title>
<p>Radiomics is an emerging artificial intelligence technology that utilizes high-throughput features extracted from imaging features for divulging cancer biological and genetic characteristics (<xref ref-type="bibr" rid="B1">1</xref>&#x2013;<xref ref-type="bibr" rid="B4">4</xref>) in oncology. It has demonstrated promises and offered insights with its defined radiomic models into cancer diagnosis (<xref ref-type="bibr" rid="B5">5</xref>), prognostication (<xref ref-type="bibr" rid="B6">6</xref>), treatment response (<xref ref-type="bibr" rid="B7">7</xref>) as well as toxicity prediction (<xref ref-type="bibr" rid="B8">8</xref>). Despite a wide range of potential applications in the clinic, a primary concern of radiomics modeling is its robustness of radiomic models.</p>
<p>Identifying robust features is the prerequisite for building a robust radiomic model. However, the rare availability of test-retest scans prevents radiomic studies from comprehensively assessing feature robustness. Therefore, Zwanenburg et&#xa0;al. (<xref ref-type="bibr" rid="B9">9</xref>) proposed a perturbation-based dataset-specific radiomic feature robustness assessment method, an alternative to the conventional test-retest method. The feature robustness is quantified using the intra-class coefficient of correlation (ICC) from simulated perturbation images. The quantified feature robustness is used to identify and remove the low-robust features. However, the impact of eliminating low-robust features in radiomic modeling on the final model has not been discussed, which prevents the optimal utility of feature robustness.</p>
<p>Therefore, it would be instructive if the impact on radiomic model is clear when removing low-robust features. This manuscript evaluated the radiomic model&#x2019;s robustness and generalizability under different thresholds of the low-robust feature removal. The model robustness is quantified with ICC using the perturbation method (<xref ref-type="bibr" rid="B10">10</xref>), and model generalizability is quantified with the train-test difference of Area under the Receiver Operating Characteristic curve (AUC), AUC<sub>testing</sub> &#x2013; AUC<sub>training</sub>. The change in the model performance would provide informative guidance when removing low-robust radiomic features from modeling.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<title>Materials and methods</title>
<sec id="s2_1">
<title>Overview</title>
<p>The overall study workflow is summarized in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1A</bold>
</xref>. Four publicly available datasets of head-and-neck cancer (HNC) named 1) Head-Neck-Radiomics-HN1 (HN1) (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B11">11</xref>),, 2) Head-Neck-PET-CT (HN-PETCT) (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B12">12</xref>),, 3) HNSCC (<xref ref-type="bibr" rid="B13">13</xref>&#x2013;<xref ref-type="bibr" rid="B15">15</xref>), 4) OPC-Radiomics (OPC) (<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B17">17</xref>), were collected, and each dataset was used to perform the analysis independently. Two prediction outcomes, including distant metastasis (DM) and local-/regional- recurrence (LR), were modeled using five commonly used classifiers. The five classifiers include Ridge (<xref ref-type="bibr" rid="B18">18</xref>), Supporting Vector Classifier (SVC) (<xref ref-type="bibr" rid="B19">19</xref>), classifiers implementing the k-nearest neighbor&#x2019;s vote (KNN) (<xref ref-type="bibr" rid="B20">20</xref>), Decision Tree (<xref ref-type="bibr" rid="B21">21</xref>), and Multilayer Perceptron Neural Network (MLP) (<xref ref-type="bibr" rid="B22">22</xref>). Each dataset was randomly split into multiple training and testing cohorts for repeated stratified cross-validation, and the training cohorts underwent robustness analysis, feature selection, and modeling. During each cross-validation iteration, the robustness of each radiomic feature was analyzed by image perturbations on the training samples and quantified by ICC. Features with high robustness scores were filtered out and further selected based on outcome relevance and redundancy before model training. To validate the performance of both model generalizability and robustness using radiomic features with increasing robustness, three groups of radiomic models were constructed 1) without feature robustness filtering, 2) with filtering threshold of 0.75, and 3) with filtering threshold of 0.95, as shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1B</bold>
</xref>. The robustness and generalizability of the three groups of radiomic models were compared statistically. The comparisons were performed independently for the 4 datasets, 2 outcomes, and 5 classifiers, resulting in 40 experiments in total. The improvements of the final selected radiomic feature robustness were also validated through statistical comparisons.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>The overall study workflow <bold>(A)</bold> and model construction and performance analyses workflow <bold>(B)</bold>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-12-974467-g001.tif"/>
</fig>
</sec>
<sec id="s2_2">
<title>Materials</title>
<p>A total of 1,419 HNC patients were recruited from the four publicly available datasets from The Cancer Imaging Archive (TCIA) (<xref ref-type="bibr" rid="B20">20</xref>). Pre-treatment computed tomography (CT) images and corresponding structure sets for radiation therapy were collected in DICOM format from the TCIA website. DM and LR records were also collected as predictive targets for radiomic modeling. They are two critical oncological endpoints in cancer treatment prognosis (<xref ref-type="bibr" rid="B23">23</xref>, <xref ref-type="bibr" rid="B24">24</xref>), and the common predictive outcomes in many radiomics studies (<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B25">25</xref>, <xref ref-type="bibr" rid="B26">26</xref>).</p>
<p>In order to ensure data consistency, a set of inclusion criteria were applied. Only patients with available 1) pre-treatment CT images, 2) clinical outcomes records of both DM and LR, and 3) primary gross tumor volume (GTV) contours were included in the study. The identifier of the selected image and the GTVs are also shared in GitHub for replication purposes. Each dataset was splitted into 60 training and testing sets using repeated stratified cross-validation. The folder numbers were chosen in a way that at least two patients in the minority group and 100 patients in total are left for testing to ensure the reliability of the testing performance. The final selected patient numbers, patient distributions for the two prediction outcomes, and train-test split cross-validation methods for the five datasets are listed in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>The total patient numbers, patient distributions of the two binary prediction outcomes, and the train-test cross-validation methods of the screened patient cohort of the four public datasets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Dataset name</th>
<th valign="top" align="center">Total patient No.</th>
<th valign="top" colspan="2" align="center">Distant metastasis</th>
<th valign="top" colspan="2" align="center">Local-/regional- recurrence</th>
<th valign="top" align="center">Cross-validation method</th>
</tr>
<tr>
<th valign="top" align="center"/>
<th valign="top" align="center"/>
<th valign="top" align="center">Event</th>
<th valign="top" align="center">Non-event</th>
<th valign="top" align="center">Event</th>
<th valign="top" align="center">Non-event</th>
<th valign="top" align="center"/>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">HN1</td>
<td valign="top" align="center">137</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">129</td>
<td valign="top" align="center">34</td>
<td valign="top" align="center">103</td>
<td valign="top" align="left">Stratified 2-fold, 30 repetitions</td>
</tr>
<tr>
<td valign="top" align="left">HN-PETCT</td>
<td valign="top" align="center">298</td>
<td valign="top" align="center">40</td>
<td valign="top" align="center">258</td>
<td valign="top" align="center">43</td>
<td valign="top" align="center">255</td>
<td valign="top" align="left">Stratified 3-fold, 20 repetitions</td>
</tr>
<tr>
<td valign="top" align="left">HNSCC</td>
<td valign="top" align="center">460</td>
<td valign="top" align="center">39</td>
<td valign="top" align="center">421</td>
<td valign="top" align="center">65</td>
<td valign="top" align="center">395</td>
<td valign="top" align="left">Stratified 4-fold, 15 repetitions</td>
</tr>
<tr>
<td valign="top" align="left">OPC</td>
<td valign="top" align="center">524</td>
<td valign="top" align="center">74</td>
<td valign="top" align="center">450</td>
<td valign="top" align="center">73</td>
<td valign="top" align="center">451</td>
<td valign="top" align="left">Stratified 4-fold, 15 reptations</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_3">
<title>Image preprocessing and radiomic feature extraction</title>
<p>Radiomic features were extracted from the pre-treatment CTs within GTVs. The images and GTV contours were preprocessed before extracting features to maintain feature reproducibility and consistency (<xref ref-type="bibr" rid="B27">27</xref>, <xref ref-type="bibr" rid="B28">28</xref>). First, CT images were isotopically resampled into 1mm x 1mm x 1mm using B-spline interpolation. The GTV contours were converted into voxel-based segmentation masks according to the resampled CT image grids. Additionally, a re-segmentation mask of the HU range of [-150, 180] was generated for each image to limit the texture feature extraction within soft tissue. All the mentioned preprocessing steps were implemented on Python (3.8) using SimpleITK (1.2.4) (<xref ref-type="bibr" rid="B29">29</xref>) and OpenCV (<xref ref-type="bibr" rid="B30">30</xref>) packages.</p>
<p>The rest of image preprocessing and radiomic feature extraction were performed using Pyradiomics (2.2.0) (<xref ref-type="bibr" rid="B31">31</xref>) package. In addition to the original image, features were extracted from 11 filtered images, including three Laplacian-of-Gaussian (LoG) filtered images (with a sigma value of 1, 3, and 6&#xa0;mm), and eight coilf1 wavelet filtered images (LLL, HLL, LHL, LLH, LHH, HLH, HHL, HHH). The image intensities of both the original and filtered images were discretized into multiple fixed bin counts of 50, 100, 150, 200, 250, 300, and 350 for texture feature extraction to reduce the feature susceptibility to image noise. A total of 5486 radiomics features were extracted for each patient. The radiomic feature extraction parameter file for Pyradiomics can be found in the GitHub link.</p>
</sec>
<sec id="s2_4">
<title>Feature robustness analysis and filtering</title>
<p>The robustness of radiomic features were analyzed <italic>via</italic> the image perturbations in four modes proposed by Zwanenburg et&#xa0;al. (<xref ref-type="bibr" rid="B9">9</xref>) with slight modifications. For each perturbation, both the image and mask were translated and rotated simultaneously by a random amount. They aim to simulate the patient position variation during imaging. A random Gaussian noise field was added to the image to mimic the noise level variations between different imaging acquisitions. The GTV mask was also deformed by a randomly generated deformable vector field. It aims to mimic the inter-observer variability during GTV delineation. Dice similarity index of 0.75 and the Hausdorff distance of 5&#xa0;mm were used to constrain the perturbed contours. Multiple parameters of the different perturbation modes were chosen. The translation distances, rotation angles, noise addition levels, and contour randomization parameters were listed in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>. To explore the perturbations within the specified range as much as possible, 60 perturbations among the entire 4,423,680 combinations of perturbation modes were randomly chosen independently for each patient. The complete set of radiomic features were extracted for the chosen perturbations, and the feature robustness was calculated for each training set using the one-way, random intraclass coefficient of correlation (ICC) (<xref ref-type="bibr" rid="B32">32</xref>, <xref ref-type="bibr" rid="B33">33</xref>), with patients as subjects and perturbations as raters. The ICC scores were used to filter out the robust features based on a pre-defined threshold before feature selection and modeling</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>The parameters of perturbation modes.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Perturbation modes</th>
<th valign="top" align="center">Perturbation range</th>
<th valign="top" align="center">Reference axis</th>
<th valign="top" align="center">Perturbation number</th>
<th valign="top" align="center">Total number</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Translation distance (mm)</td>
<td valign="top" align="center">0 to 3 with a 0.2 step size</td>
<td valign="top" align="left">AP, SI, LM</td>
<td valign="top" align="center">4,096</td>
<td valign="top" rowspan="4" align="center">4,423,680</td>
</tr>
<tr>
<td valign="top" align="left">Rotation angles (degree)</td>
<td valign="top" align="center">-20 to 20 with a 5 step size</td>
<td valign="top" align="left">SI</td>
<td valign="top" align="center">9</td>
</tr>
<tr>
<td valign="top" align="left">Noise addition level</td>
<td valign="top" align="center">0, 1, 2, 3</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="center">4</td>
</tr>
<tr>
<td valign="top" align="left">Contour Randomization</td>
<td valign="top" align="center">30</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="center">30</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>AP, anterior-posterior; SI, superior-inferior; LM, lateral-medial.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s2_5">
<title>Feature selection and modeling</title>
<p>A two-step feature selection approach was adopted to obtain the top features that are less redundant and more relevant to the outcome for modeling. First, the outcome relevance of each feature was evaluated by one-way ANOVA P-value repeatedly under downsample bootstrapping [imbalanced-learn 0.8.0 (<xref ref-type="bibr" rid="B34">34</xref>)] without replacement with 100 iterations on the training set. Features with P-values less than 0.1 were picked out in each iteration and ranked by their frequencies, and the top 10% features with the highest frequencies were chosen. Second, the feature with a higher mean correlation with the rest of the features in each highly correlated feature pair was removed. Pearson correlation coefficient was used to evaluate inter-feature correlation, and the threshold of 0.6 was chosen to identify the feature pairs with high correlations. A maximum of 10 features was further filtered based on the outcome relevance frequency ranking acquired in the previous step. The predictive models were trained from the final selected features using five different classification methods with automatic hyperparameter tunning. All the model trainings were implemented with the scikit-learn (0.24.0) (<xref ref-type="bibr" rid="B35">35</xref>) package. All the feature selection and modeling process was on training dataset.</p>
</sec>
<sec id="s2_6">
<title>Performance analyses</title>
<p>The reliability of the predictive models was evaluated in both generalizability and robustness. Model generalizability evaluates model predictability consistency between the training cohort and the unseen cohort. It is quantified as the difference between training and testing predictability which is scored by the AUC. The model robustness metric was designed to evaluate the prediction reliability of patients under different perturbations across all the patients using ICC (1,1) (<xref ref-type="bibr" rid="B10">10</xref>). The ICC(1,1) is calculated with</p> <disp-formula>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>R</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>M</mml:mi>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>W</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>R</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mi>M</mml:mi>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>W</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where MS<sub>R</sub> = mean square for rows; MS<sub>W</sub> = mean square for residual sources of variance; k = number of raters/measurements. In our case, MS<sub>R</sub> is the mean square for patients, and MS<sub>W</sub> is the averaged inter-perturbation variance, and k is the number of perturbations.</p>
<p>These two performance scores were calculated for all the models generated from the 60 cross-validation iterations and statistically compared between each of the two feature robustness filtering thresholds (ICC &gt; 0.75, ICC &gt; 0.95) and the performance of models constructed without robustness filtering using pairwise t-test. The comparisons were performed for each dataset, prediction outcome, and modeling classifier independently. Additionally, the robustness of the final selected features with and without robustness filtering was statistically compared by pairwise t-test for each dataset and prediction outcome.</p>
</sec>
<sec id="s2_7">
<title>Bias evaluation against feature selection method</title>
<p>It is possible that the single feature selection method could lead to bias in the results. To facilitate the potential bias, the minimum redundancy maximum relevance (mRMR) feature selection method was implemented. The robustness and generalizability analysis was performed for all four datasets and outcomes with the Ridge classifier. This bias evaluation aims to ensure that the conclusion is not biased towards a specific feature selection method.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<title>Results</title>
<sec id="s3_1">
<title>Feature robustness and model robustness</title>
<p>The radiomic feature robustness was quantified by the ICC under image perturbations. The distributions of all the extracted radiomic features show a strong skewness towards higher robustness, as shown by the histograms of feature ICCs for the four datasets in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>. Different datasets show distinctive patterns of feature robustness distributions. HN1 (median = 0.84) and HN-PTECT (median = 0.82) has more features with high robustness whereas HNSCC (median = 0.77) and OPC (median = 0.74) have the histograms skewed towards the lower end. On average, 3320/5486 radiomic features remained after being filtered by the threshold of 0.75 and 605/5486 remained for the threshold of 0.95. The final selected radiomics features after the subsequent feature selection procedures showed a significant increase (P&lt; 10<sup>-11</sup>) in mean ICC with increasing feature robustness filtering thresholds. On average, the ICC of the final selected features improved by 0.18 under the filtering threshold of 0.75, and the improvement increased to 0.30 under the threshold of 0.95, as shown by the first column of the heatmaps in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Histograms of the robustness of all the extracted radiomic features for the four analyzed datasets averaged under cross-validations. Feature robustness is quantified as intraclass correlation coefficient (ICC). The shaded areas indicate the 95% confidence interval of the average histogram curves. In general, there are more high-robust features than ones with low robustness. Different datasets show distinctive patterns of feature robustness distributions. HN1 and HN-PETCT have more features with high robustness, whereas HNSCC and OPC have the histograms skewed towards the lower end.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-12-974467-g002.tif"/>
</fig>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>The barplot shows the model robustness ICC distribution for three feature robustness filtering groups, ICC &gt; 0, ICC &gt; 0.75, and ICC &gt; 0.95. The feature robustness filtering of ICC &gt; 0.95 yields the most robust model. *** indicates the p-value is smaller than 0.0001.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-12-974467-g003.tif"/>
</fig>
<p>The radiomic model robustness improved significantly after removing non-robust features prior to modeling. The ICC of radiomic models constructed without feature robustness filtering is 0.65 averaged over all the datasets, outcomes, and classifiers. It is improved to 0.78 (P&lt; 0.0001) and 0.91 (P&lt; 0.0001) after feature robustness filtering with ICC &gt; 0.75 and ICC &gt; 0.95, respectively. The box plot in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref> showed the distribution of the model robustness ICC. Interestingly, the outliers indicated observations in low robust models (ICC&lt; 0.5) using high robust features (ICC &gt; 0.75 and ICC &gt; 0.95), despite the statistical significance in model robustness differences. The outlier samples were further analysed in terms of the datasets and classifiers. No statistical difference was found in different datasets (P &gt; 0.05), and statistical differences were observed in the classifiers. In the feature robustness filtering group of ICC &gt; 0.95, 8 (0.33%) samples in KNN, and 4 (0.17%) samples in Decision Tree were found to have poor model robustness performance despite using excellent-robust radiomic features.</p>
<p>The detailed results in model robustness improvements and their statistical tests for the four datasets (row) and five classifiers (column) are visualized in the last five columns of the heatmaps in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>, separated by outcome and robustness filtering thresholds. Heterogeneous model robustness improvements can be observed in different datasets, classifiers, and prediction outcomes. Higher (ICC &gt; 0.75: 0.045~0.24, ICC &gt; 0.95: 0.11~0.47) and more statistically significant (ICC &gt; 0.75: P-value=9.8 &#xd7; 10<sup>-35</sup>~1.1 &#xd7; 10<sup>-2</sup>, ICC &gt; 0.95: P-value=8.9 &#xd7; 10<sup>-48</sup>~1.2 &#xd7; 10<sup>-8</sup>) prediction ICC increases were found with the higher feature robustness filtering threshold in general.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Average intraclass correlation coefficient (ICC) improvement <bold>(A)</bold> and t-test P-values <bold>(B)</bold> of the final selected features and testing predictions after robust feature pre-selection shown in heatmaps. Each heatmap contains the results of one prediction outcome and one feature robustness filtering threshold. The first column of each heatmap represents the improvements of the final selected radiomic features, and the remaining five columns are the improvements of the testing prediction robustness using different classifiers. Results of the four datasets are recorded in rows. All the experiments showed positive improvements in ICC. A higher and more statistically significant increase in average ICC improvements can be observed with a higher filtering threshold.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-12-974467-g004.tif"/>
</fig>
</sec>
<sec id="s3_2">
<title>Model generalizability</title>
<p>Model generalizability is quantified as the difference between the training and testing Area Under the Receiver Operating Characteristic Curve (AUCs), and a lower score indicates better generalizability. The model generalizability score averaged over all the datasets, outcomes, and classifiers are 0.21, 0.18, and 0.12 without robustness filtering, with the filtering threshold of 0.75 (P&lt; 0.0001) and the threshold of 0.95 (P&lt; 0.0001), respectively, shown in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>The boxplot showed the train-test performance differences. The most restricted feature robustness filtering provides the most generalizable models. *** indicates the p-value is smaller than 0.0001.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-12-974467-g005.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref> shows the subgroup analysis based on datasets, outcomes and classifiers. In general, model generalizability showed statistically significant improvements after feature robustness filtering in most experiments, as shown by the majority of negative mean generalizability differences and small t-test P-values. However, the prediction of LR on HN-PETCT had positive mean generalizability differences (ICC &gt; 0.75: -0.026~0.013, ICC &gt; 0.95: -0.025~0.016) for most of the classifiers under both filtering thresholds. Despite the heterogeneous results among datasets, outcomes, and classifiers, larger improvements with higher statistical significance in mode generalizability were observed with the higher feature robustness filtering threshold (ICC &gt; 0.75: -0.06~-0.02, P-value = 7.2 &#xd7; 10<sup>-7</sup>~2.1 &#xd7; 10<sup>-1</sup>; ICC &gt; 0.95: -0.19~-0.054, P-value=4.8 &#xd7; 10<sup>-15</sup>~6.5 &#xd7; 10<sup>-1</sup>) apart from LR models for HN-PETCT. <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref> shows the comparisons of average training and testing AUCs along with its 95% confident interval across the cross-validation models with increasing feature robustness filtering thresholds. Each subfigure contains the results of all the five classifiers shown in different colors and separated by datasets and clinic outcomes. Decreasing training AUCs were observed with increasing filtering thresholds. Specifically, the training AUCs averaged over all the datasets and prediction outcomes without feature robustness filtering, with robustness filtering on ICC &gt; 0.75, and with filtering on ICC &gt; 0.95 are 0.78, 0.76, and 0.69, respectively. Significant drops of training AUCs (pairwise t-test P-values&lt; 0.05) were observed in 33/40 experiments from no feature robustness filtering to the threshold of 0.75 and 40/40 experiments to the threshold of 0.95. Meanwhile, the average testing AUCs are 0.57, 0.58, 0.57 with 18/40 experiments showing statistical significant difference (pairwise t-test P-values&lt; 0.05) for ICC &gt; 0.75 and 24/40 for ICC &gt; 0.95. Different classifiers showed heterogeneous trends of testing AUCs under increasing thresholds. Notably, the testing AUCs of LR radiomic models on HN-PETCT showed significant decrease for feature robustness filtering with ICC &gt; 0.75 (mean decrease: 0.026, 5/5 classifiers with P-value&lt; 0.05) and ICC &gt; 0.95 (mean decrease: 0.102, 4/5 classifiers with P-value&lt; 0.05).</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Heatmaps on mean model generalizability improvements <bold>(A)</bold> and statistical test results <bold>(B)</bold> after feature robustness filtering. Model generalizability is defined as the difference between training and testing AUCs, AUC<sub>testing</sub> - AUC<sub>training</sub>. A score closer to zero shows better generalizability. In general, model generalizability improved after feature robustness filtering, as shown by the negative values on the heatmaps <bold>(A)</bold> for both filtering thresholds. Greater improvements were observed with the higher filtering threshold (ICC &gt; 0.95). Moreover, more significant differences are shown by the smaller P-value. However, the predictions of LR on the dataset HN-PETCT showed worse generalizability after feature robustness filtering and the opposite trend of generalizability change and statistical test results with increasing filtering thresholds.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-12-974467-g006.tif"/>
</fig>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>The mean and its 95% confidence interval of the training and testing AUCs of the final constructed models. Each color represents one classifier for modeling. The solid lines represent the training performances, and the dashed lines represent the testing performances. The 95% confidence intervals are drawn by the error bars. Each subfigure contains the evolution of training/testing AUCs with increasing feature robustness filtering thresholds for one dataset and prediction outcome. A decreasing trend of training AUCs were observed with increasing thresholds for all the datasets, prediction outcomes, and classifiers. The testing AUCs remain stable except for local-regional recurrence prediction on HN-PETCT dataset.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-12-974467-g007.tif"/>
</fig>
</sec>
<sec id="s3_3">
<title>Bias evaluation</title>
<p>The model robustness improved significantly with the improved feature robustness <italic>via</italic> the mRMR feature selection, as shown in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, which is consistent with the model robustness improvement with filer-based feature selection.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>The model robustness (ICC) for different feature robustness pre-screening thresholds.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Outcomes</th>
<th valign="top" align="center"/>
<th valign="top" align="center">ICC &gt; 0</th>
<th valign="top" align="center">ICC &gt; 0.75</th>
<th valign="top" align="center">ICC &gt; 0.95</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" rowspan="4" align="left">DM</td>
<td valign="top" align="left">HN1</td>
<td valign="top" align="center">0.73 (0.66 - 0.79)</td>
<td valign="top" align="center">0.88 (0.84 - 0.91)</td>
<td valign="top" align="center">0.95 (0.94 - 0.96)</td>
</tr>
<tr>
<td valign="top" align="left">HN-PETCT</td>
<td valign="top" align="center">0.76 (0.71 - 0.80)</td>
<td valign="top" align="center">0.92 (0.90 - 0.94)</td>
<td valign="top" align="center">0.92 (0.97 - 0.98)</td>
</tr>
<tr>
<td valign="top" align="left">HNSCC</td>
<td valign="top" align="center">0.69 (0.64 - 0.75)</td>
<td valign="top" align="center">0.78 (0.93 - 0.82)</td>
<td valign="top" align="center">0.94 (0.93 - 0.96)</td>
</tr>
<tr>
<td valign="top" align="left">OPC</td>
<td valign="top" align="center">0.74 (0.70 - 0.79)</td>
<td valign="top" align="center">0.91 (0.90 - 0.93)</td>
<td valign="top" align="center">0.99 (0.99 - 0.99)</td>
</tr>
<tr>
<td valign="top" rowspan="4" align="left">LR</td>
<td valign="top" align="left">HN1</td>
<td valign="top" align="center">0.70 (0.64 - 0.77)</td>
<td valign="top" align="center">0.86 (0.82 - 0.90)</td>
<td valign="top" align="center">0.96 (0.95 - 0.98)</td>
</tr>
<tr>
<td valign="top" align="left">HN-PETCT</td>
<td valign="top" align="center">0.63 (0.57 - 0.70)</td>
<td valign="top" align="center">0.81 (0.77 - 0.85)</td>
<td valign="top" align="center">0.94 (0.92 - 0.95)</td>
</tr>
<tr>
<td valign="top" align="left">HNSCC</td>
<td valign="top" align="center">0.73 (0.68 - 0.78)</td>
<td valign="top" align="center">0.89 (0.86 - 0.91)</td>
<td valign="top" align="center">0.98 (0.97 - 0.98)</td>
</tr>
<tr>
<td valign="top" align="left">OPC</td>
<td valign="top" align="center">0.70 (0.66 - 0.75)</td>
<td valign="top" align="center">0.84 (0.81 - 0.87)</td>
<td valign="top" align="center">0.97 (0.97 - 0.98)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The training AUC showed a consistent drop with the increase in the threshold of feature robustness, shown in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>. In contrast, the testing AUC showed an increase or maintaining the same level, resulting in the improved model generalizability.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>The training and testing AUC between different feature robustness pre-screening thresholds.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Outcomes</th>
<th valign="top" align="center">
</th>
<th valign="top" colspan="2" align="center">ICC &gt; 0</th>
<th valign="top" colspan="2" align="center">ICC &gt; 0.75</th>
<th valign="top" colspan="2" align="center">ICC &gt; 0.95</th>
</tr>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="center">
</th>
<th valign="top" align="center">Training AUC</th>
<th valign="top" align="center">Testing AUC</th>
<th valign="top" align="center">Training AUC</th>
<th valign="top" align="center">Testing AUC</th>
<th valign="top" align="center">Training AUC</th>
<th valign="top" align="center">Testing AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" rowspan="4" align="left">DM</td>
<td valign="top" align="left">HN1</td>
<td valign="top" align="center">0.96</td>
<td valign="top" align="center">0.52</td>
<td valign="top" align="center">0.92</td>
<td valign="top" align="center">0.53</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">0.60</td>
</tr>
<tr>
<td valign="top" align="left">HN-PETCT</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.69</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">0.70</td>
<td valign="top" align="center">0.74</td>
<td valign="top" align="center">0.70</td>
</tr>
<tr>
<td valign="top" align="left">HNSCC</td>
<td valign="top" align="center">0.76</td>
<td valign="top" align="center">0.53</td>
<td valign="top" align="center">0.68</td>
<td valign="top" align="center">0.50</td>
<td valign="top" align="center">0.63</td>
<td valign="top" align="center">0.53</td>
</tr>
<tr>
<td valign="top" align="left">OPC</td>
<td valign="top" align="center">0.72</td>
<td valign="top" align="center">0.60</td>
<td valign="top" align="center">0.68</td>
<td valign="top" align="center">0.62</td>
<td valign="top" align="center">0.64</td>
<td valign="top" align="center">0.62</td>
</tr>
<tr>
<td valign="top" rowspan="4" align="left">LR</td>
<td valign="top" align="left">HN1</td>
<td valign="top" align="center">0.86</td>
<td valign="top" align="center">0.57</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">0.60</td>
<td valign="top" align="center">0.70</td>
<td valign="top" align="center">0.60</td>
</tr>
<tr>
<td valign="top" align="left">HN-PETCT</td>
<td valign="top" align="center">0.83</td>
<td valign="top" align="center">0.62</td>
<td valign="top" align="center">0.79</td>
<td valign="top" align="center">0.63</td>
<td valign="top" align="center">0.70</td>
<td valign="top" align="center">0.54</td>
</tr>
<tr>
<td valign="top" align="left">HNSCC</td>
<td valign="top" align="center">0.74</td>
<td valign="top" align="center">0.62</td>
<td valign="top" align="center">0.72</td>
<td valign="top" align="center">0.64</td>
<td valign="top" align="center">0.68</td>
<td valign="top" align="center">0.65</td>
</tr>
<tr>
<td valign="top" align="left">OPC</td>
<td valign="top" align="center">0.72</td>
<td valign="top" align="center">0.52</td>
<td valign="top" align="center">0.69</td>
<td valign="top" align="center">0.54</td>
<td valign="top" align="center">0.61</td>
<td valign="top" align="center">0.54</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The bias analysis against the feature selection method showed consistent results between the filter-based and mRMR feature selection methods in improving model robustness and generalizability with robust radiomic features. Therefore, it is unlikely that different feature selection algorithms would affect the conclusion.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<title>Discussion</title>
<p>After removing low-robust features, the radiomic model&#x2019;s robustness and generalizability have been improved, and the improvement is consistent across multiple datasets, different clinical outcomes, and classifiers. Our results also offer two practical implications. The radiomic model&#x2019;s robustness needs to be evaluated despite using high-robust radiomic features in modeling. The restricted thresholding on feature robustness would prevent the optimal performance of the radiomic model to the unseen dataset.</p>
<p>Previous literature has discussed the positive impact of robust feature pre-selection on radiomic model generalizability and robustness. For instance, Haarburger et&#xa0;al. (<xref ref-type="bibr" rid="B36">36</xref>) envisioned that robust-only features are more likely to lead to a more reliable radiomic model. Vuong et&#xa0;al. (<xref ref-type="bibr" rid="B37">37</xref>) obtained a radiomic model with multi-institutional datasets, which performed equally well as a model on a standardized dataset by including pre-screening on the robust features. Our results confirmed their envision and findings with quantifiable measurements of model robustness and generalizability improvements, providing concrete evidence of increased model stability after feature robustness filtering.</p>
<p>The improved model robustness can be explained by the reduced variability of the final selected features after pre-screening on feature robustness, as indicated by the statistically smaller mean feature ICCs. Model output variability is thus reduced as the final selected features are the direct model input. On the other hand, without feature robustness filtering beforehand, low-robust features are likely to remain after feature selection. They are more likely to be related to the outcome in the training cohort by chance (type I error) and less likely to be predictive of the unseen cohort or the entire population. Thus, the final constructed models tend to have high AUCs in training, but low testing. The high type I error caused by low feature robustness reduces the power of feature selection in identifying the truly predictive features and lowers the generalizability of the final constructed models. However, a statistically significant reduction (mean: 0.007, P-value&lt; 0.001) in LR prediction generalizability and testing AUCs (mean: 0.1, P-value&lt; 0.001) with pre-selection of robust features on the HN-PETCT dataset is discovered, as shown in <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>. We found out that one non-robust feature - <italic>wavelet-LHH_glszm_ZoneEntropy</italic> - demonstrated a significant correlation with LR in the entire HN-PETCT cohort with P-value&lt; 0.001. Meanwhile, it is vulnerable against the image perturbations with an ICC of 0.36 (95% CI: [0.32, 0.42]) and thus removed from modeling, resulting in a reduction in overall model predictability and generalizability. This raises the concern about the limited reliability of testing predictability in representing the model generalizability on the unseen population. To further explain the reduced testing performance, we have calculated the distribution of testing AUCs on the perturbed data and compared with the results on the original data for dataset HN-PETCT and SVC classifier, as visualized in <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref>. Compared with DM predictions, the testing AUCs for LR demonstrated higher variabilities, and the original testing AUCs deviated more to the averaged AUCs under perturbations. Although the original testing AUCs increased statistically (ICC&lt; 0.75: mean increase = 0.02, P-value&lt; 0.01; ICC&lt; 0.95: mean increase = 0.019, P-value&lt; 0.01) after feature robustness filtering for LR, the average testing AUCs showed the opposite trend. The high variability of testing AUCs on LR increases the risk of under-representative testing performance evaluation on the original data, which can be alleviated by feature robustness filtering. Our new findings also support recommendation of using the averaged feature values under image perturbations for modeling (<xref ref-type="bibr" rid="B9">9</xref>).</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>The comparison of the original and perturbed testing AUCs of HN-PETCT-298 averaged over train-test splits for the prediction of DM <bold>(A)</bold> and LR <bold>(B)</bold> using SVC. The testing AUCs showed high consistencies between the original images and perturbed images for the prediction of DM while large deviations were observed for the prediction of LR.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-12-974467-g008.tif"/>
</fig>
<p>Notably, we applied a comprehensive evaluation framework to assess model robustness and generalizability under repeated cross-validations. Instead of only splitting the entire cohort into a single training-testing pair and generating a single model for evaluation, multiple independent train-test splits can give statistical and unbiased evaluations of the impact of radiomic feature robustness on model robustness and generalizability. The main drawback of this method is the high heterogeneity in training and testing performance among iterations (<xref ref-type="bibr" rid="B38">38</xref>), which may reduce the statistical significance of our results. We used image perturbations to assess both radiomic feature robustness and model robustness. Although the scope of the image perturbations applied in this study might be limited, and the resulting feature robustness and model robustness is not guaranteed to be as sensitive as test-retest imaging and manual re-contouring, they are rather conservative simulations that impose no additional cost in medical resources and can be easily applied to any dataset. Comprehensive validations of the proposed perturbation method in the future are warranted to increase the credibility of this work. There are other limitations of this study. First, we only considered four datasets of head-and-neck cancer datasets from The Cancer Imaging Archive (TCIA), and our results may only be generalizable to head-and-neck data. To further generalize the findings to other sites, it is encouraged to test our method on more cancer sites. Second, bias could arise from the single feature selection method, as different criteria and techniques in feature selection have different power in identifying truly predictive radiomic features. It is also suggested to validate our methods with different feature selection methods.</p>
</sec>
<sec id="s5">
<title>Conclusion</title>
<p>In this study, we evaluated radiomic model&#x2019;s robustness and generalizability by removing the low-robust features. Our results suggested to remove low-robust features to improve model robustness and generalizability to unseen data. Our findings also imply evaluating model robustness despite using robust features already, and the strictest threshold in feature robustness may undermine the optimal model performance.</p>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found below: <uri xlink:href="https://github.com/vivixinzhi/improved-robustness-and-generalizability-of-radiomic-modeling-via-image-perturabtion">https://github.com/vivixinzhi/improved-robustness-and-generalizability-of-radiomic-modeling-via-image-perturabtion</uri>.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>XT, JZ, and JC conceptualized the idea. XT, YZ, and JZ performed data analysis and validation. FK-hL, K-hA, VL, and AC provided the resources. ZM, SL, WL, and HX performed data cleaning and verification. TL, BL, TZ, GR, SL, WL, and HX provide the paper edition and review. XT drafted the original manuscript. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>This research was partly supported by Project of Strategic Importance Fund (P0035421), and Project of RI-IWEAR fund (P0038684) from Hong Kong Polytechnic University, and Shenzhen-Hong Kong-Macau S&amp;T Program (Category C) (SGDX20201103095002019) from Shenzhen Science and Technology Innovation Committee.</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aerts</surname> <given-names>HJWL</given-names>
</name>
<name>
<surname>Velazquez</surname> <given-names>ER</given-names>
</name>
<name>
<surname>Leijenaar</surname> <given-names>RTH</given-names>
</name>
<name>
<surname>Parmar</surname> <given-names>C</given-names>
</name>
<name>
<surname>Grossmann</surname> <given-names>P</given-names>
</name>
<name>
<surname>Carvalho</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>Decoding tumour phenotype by noninvasive imaging using a quantitative radiomics approach</article-title>. <source>Nat Commun</source> (<year>2014</year>) <volume>5</volume>(<issue>1</issue>):<fpage>4006</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/ncomms5006</pub-id>
</citation>
</ref>
<ref id="B2">
<label>2</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gillies</surname> <given-names>RJ</given-names>
</name>
<name>
<surname>Kinahan</surname> <given-names>PE</given-names>
</name>
<name>
<surname>Hricak</surname> <given-names>H</given-names>
</name>
</person-group>. <article-title>Radiomics: Images are more than pictures, they are data</article-title>. <source>Radiology</source> (<year>2015</year>) <volume>278</volume>(<issue>2</issue>):<page-range>563&#x2013;77</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1148/radiol.2015151169</pub-id>
</citation>
</ref>
<ref id="B3">
<label>3</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lambin</surname> <given-names>P</given-names>
</name>
<name>
<surname>Leijenaar</surname> <given-names>RTH</given-names>
</name>
<name>
<surname>Deist</surname> <given-names>TM</given-names>
</name>
<name>
<surname>Peerlings</surname> <given-names>J</given-names>
</name>
<name>
<surname>de Jong</surname> <given-names>EEC</given-names>
</name>
<name>
<surname>van Timmeren</surname> <given-names>J</given-names>
</name>
<etal/>
</person-group>. <article-title>Radiomics: the bridge between medical imaging and personalized medicine</article-title>. <source>Nat Rev Clin Oncol</source> (<year>2017</year>) <volume>14</volume>(<issue>12</issue>):<page-range>749&#x2013;62</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nrclinonc.2017.141</pub-id>. Art. no. 12.</citation>
</ref>
<ref id="B4">
<label>4</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thawani</surname> <given-names>R</given-names>
</name>
<name>
<surname>McLane</surname> <given-names>M</given-names>
</name>
<name>
<surname>Beig</surname> <given-names>N</given-names>
</name>
<name>
<surname>Ghose</surname> <given-names>S</given-names>
</name>
<name>
<surname>Prasanna</surname> <given-names>P</given-names>
</name>
<name>
<surname>Velcheti</surname> <given-names>V</given-names>
</name>
<etal/>
</person-group>. <article-title>Radiomics and radiogenomics in lung cancer: A review for the clinician</article-title>. <source>Lung Cancer</source> (<year>2018</year>) <volume>115</volume>:<fpage>34</fpage>&#x2013;<lpage>41</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.lungcan.2017.10.015</pub-id>
</citation>
</ref>
<ref id="B5">
<label>5</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ferreira Junior</surname> <given-names>JR</given-names>
</name>
<name>
<surname>Koenigkam-Santos</surname> <given-names>M</given-names>
</name>
<name>
<surname>Cipriano</surname> <given-names>FEG</given-names>
</name>
<name>
<surname>Fabro</surname> <given-names>AT</given-names>
</name>
<name>
<surname>de Azevedo-Marques</surname> <given-names>PM</given-names>
</name>
</person-group>. <article-title>Radiomics-based features for pattern recognition of lung cancer histopathology and metastases</article-title>. <source>Comput Methods Programs BioMed</source> (<year>2018</year>) <volume>159</volume>:<fpage>23</fpage>&#x2013;<lpage>30</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cmpb.2018.02.015</pub-id>
</citation>
</ref>
<ref id="B6">
<label>6</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mouraviev</surname> <given-names>A</given-names>
</name>
<name>
<surname>Detsky</surname> <given-names>J</given-names>
</name>
<name>
<surname>Sahgal</surname> <given-names>A</given-names>
</name>
<name>
<surname>Ruschin</surname> <given-names>M</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>YK</given-names>
</name>
<name>
<surname>Karam</surname> <given-names>I</given-names>
</name>
<etal/>
</person-group>. <article-title>Use of radiomics for the prediction of local control of brain metastases after stereotactic radiosurgery</article-title>. <source>Neuro-Oncology</source> (<year>2020</year>) <volume>22</volume>(<issue>6</issue>):<fpage>797</fpage>&#x2013;<lpage>805</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/neuonc/noaa007</pub-id>
</citation>
</ref>
<ref id="B7">
<label>7</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shi</surname> <given-names>L</given-names>
</name>
<name>
<surname>He</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Benedict</surname> <given-names>S</given-names>
</name>
<name>
<surname>Valicenti</surname> <given-names>R</given-names>
</name>
<name>
<surname>Qiu</surname> <given-names>J</given-names>
</name>
<etal/>
</person-group>. <article-title>Radiomics for response and outcome assessment for non-small cell lung cancer</article-title>. <source>Technol Cancer Res Treat</source> (<year>2018</year>) <volume>17</volume>:<elocation-id>1533033818782788</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1177/1533033818782788</pub-id>
</citation>
</ref>
<ref id="B8">
<label>8</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Desideri</surname> <given-names>I</given-names>
</name>
<name>
<surname>Loi</surname> <given-names>M</given-names>
</name>
<name>
<surname>Francolini</surname> <given-names>G</given-names>
</name>
<name>
<surname>Becherini</surname> <given-names>C</given-names>
</name>
<name>
<surname>Livi</surname> <given-names>L</given-names>
</name>
<name>
<surname>Bonomo</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>Application of radiomics for the prediction of radiation-induced toxicity in the IMRT era: Current state-of-the-Art</article-title>. <source>Front Oncol</source> (<year>2020</year>) <volume>10</volume>:<elocation-id>1708</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fonc.2020.01708</pub-id>
</citation>
</ref>
<ref id="B9">
<label>9</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zwanenburg</surname> <given-names>A</given-names>
</name>
<name>
<surname>Leger</surname> <given-names>S</given-names>
</name>
<name>
<surname>Agolli</surname> <given-names>L</given-names>
</name>
<name>
<surname>Pilz</surname> <given-names>K</given-names>
</name>
<name>
<surname>Troost</surname> <given-names>EGC</given-names>
</name>
<name>
<surname>Richter</surname> <given-names>C</given-names>
</name>
<etal/>
</person-group>. <article-title>Assessing robustness of radiomic features by image perturbation</article-title>. <source>Sci Rep</source> (<year>2019</year>) <volume>9</volume>(<issue>1</issue>):<fpage>614</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-018-36938-4</pub-id>. Art. no. 1.</citation>
</ref>
<ref id="B10">
<label>10</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Teng</surname> <given-names>X</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J</given-names>
</name>
<name>
<surname>Zwanenburg</surname> <given-names>A</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Lam</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>Building reliable radiomic models using image perturbation</article-title>. <source>Sci Rep</source> (<year>2022</year>) <volume>12</volume>(<issue>1</issue>);<fpage>10035</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-022-14178-x</pub-id>
</citation>
</ref>
<ref id="B11">
<label>11</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Valli&#xe8;res</surname> <given-names>M</given-names>
</name>
<name>
<surname>Kay-Rivest</surname> <given-names>E</given-names>
</name>
<name>
<surname>Perrin</surname> <given-names>L</given-names>
</name>
<name>
<surname>Liem</surname> <given-names>X</given-names>
</name>
<name>
<surname>Furstoss</surname> <given-names>C</given-names>
</name>
<name>
<surname>Khaouam</surname> <given-names>N</given-names>
</name>
<etal/>
</person-group>. <article-title>Data from head-Neck-PET-CT</article-title>. <source>Cancer Imaging Arch</source> (<year>2017</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.7937/K9/TCIA.2017.8OJE5Q00</pub-id>
</citation>
</ref>
<ref id="B12">
<label>12</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Valli&#xe8;res</surname> <given-names>M</given-names>
</name>
<name>
<surname>Kay-Rivest</surname> <given-names>E</given-names>
</name>
<name>
<surname>Perrin</surname> <given-names>LJ</given-names>
</name>
<name>
<surname>Liem</surname> <given-names>X</given-names>
</name>
<name>
<surname>Furstoss</surname> <given-names>C</given-names>
</name>
<name>
<surname>Aerts</surname> <given-names>HJWL</given-names>
</name>
<etal/>
</person-group>. <article-title>Radiomics strategies for risk assessment of tumour failure in head-and-neck cancer</article-title>. <source>Sci Rep</source> (<year>2017</year>) <volume>7</volume>(<issue>1</issue>):<fpage>10117</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-017-10371-5</pub-id>
</citation>
</ref>
<ref id="B13">
<label>13</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grossberg</surname> <given-names>A</given-names>
</name>
<name>
<surname>Elhalawani</surname> <given-names>H</given-names>
</name>
<name>
<surname>Mohamed</surname> <given-names>A</given-names>
</name>
<name>
<surname>Mulder</surname> <given-names>S</given-names>
</name>
<name>
<surname>Williams</surname> <given-names>B</given-names>
</name>
<name>
<surname>White</surname> <given-names>AL</given-names>
</name>
<etal/>
</person-group>. <article-title>HNSCC</article-title>. <source>Cancer Imaging Arch</source> (<year>2020</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.7937/K9/TCIA.2020.A8SH-7363</pub-id>
</citation>
</ref>
<ref id="B14">
<label>14</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grossberg</surname> <given-names>AJ</given-names>
</name>
<etal/>
</person-group>. <article-title>Imaging and clinical data archive for head and neck squamous cell carcinoma patients treated with radiotherapy</article-title>. <source>Sci Data</source> (<year>2018</year>) <volume>5</volume>(<issue>1</issue>):<elocation-id>180173</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/sdata.2018.173</pub-id>.</citation>
</ref>
<ref id="B15">
<label>15</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<collab>MICCAI/M.D. Anderson Cancer Center Head and Neck Quantitative Imaging Working Group</collab>
</person-group>. <article-title>Matched computed tomography segmentation and demographic data for oropharyngeal cancer radiomics challenges</article-title>. <source>Sci Data</source> (<year>2017</year>) <volume>4</volume>(<issue>1</issue>):<fpage>170077</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/sdata.2017.77</pub-id>
</citation>
</ref>
<ref id="B16">
<label>16</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kwan</surname> <given-names>JYY</given-names>
</name>
<name>
<surname>Su</surname> <given-names>J</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>SH</given-names>
</name>
<name>
<surname>Ghoraie</surname> <given-names>LS</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>W</given-names>
</name>
<name>
<surname>Chan</surname> <given-names>B</given-names>
</name>
<etal/>
</person-group>. <article-title>Data from radiomic biomarkers to refine risk models for distant metastasis in oropharyngeal carcinoma</article-title>. <source>Cancer Imaging Arch</source> (<year>2019</year>) <volume>102</volume>:<page-range>1107&#x2013;16</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.7937/TCIA.2019.8DHO2GLS</pub-id>
</citation>
</ref>
<ref id="B17">
<label>17</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kwan</surname> <given-names>JYY</given-names>
</name>
<name>
<surname>Su</surname> <given-names>J</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>SH</given-names>
</name>
<name>
<surname>Ghoraie</surname> <given-names>LS</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>W</given-names>
</name>
<name>
<surname>Chan</surname> <given-names>B</given-names>
</name>
<etal/>
</person-group>. <article-title>Radiomic biomarkers to refine risk models for distant metastasis in HPV-related oropharyngeal carcinoma</article-title>. <source>Int J Radiat OncolBiolPhys</source> (<year>2018</year>) <volume>102</volume>(<issue>4</issue>):<page-range>1107&#x2013;16</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ijrobp.2018.01.057</pub-id>
</citation>
</ref>
<ref id="B18">
<label>18</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fournier</surname> <given-names>L</given-names>
</name>
<name>
<surname>Costaridou</surname> <given-names>L</given-names>
</name>
<name>
<surname>Bidaut</surname> <given-names>L</given-names>
</name>
<name>
<surname>Michoux</surname> <given-names>N</given-names>
</name>
<name>
<surname>Lecouvet</surname> <given-names>FE</given-names>
</name>
<name>
<surname>de Geus-Oei</surname> <given-names>LF</given-names>
</name>
<etal/>
</person-group>. <article-title>Incorporating radiomics into clinical trials: expert consensus endorsed by the European society of radiology on considerations for data-driven compared to biologically driven quantitative biomarkers</article-title>. <source>Eur Radiol</source> (<year>2021</year>) <volume>31</volume>:<page-range>6001&#x2013;12</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00330-020-07598-8</pub-id>
</citation>
</ref>
<ref id="B19">
<label>19</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Suter</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Knecht</surname> <given-names>U</given-names>
</name>
<name>
<surname>Al&#xe3;o</surname> <given-names>M</given-names>
</name>
<name>
<surname>Valenzuela</surname> <given-names>W</given-names>
</name>
<name>
<surname>Hewer</surname> <given-names>E</given-names>
</name>
<name>
<surname>Schucht</surname> <given-names>P</given-names>
</name>
<etal/>
</person-group>. <article-title>Radiomics for glioblastoma survival analysis in pre-operative MRI: exploring feature robustness, class boundaries, and machine learning techniques</article-title>. <source>Cancer Imaging</source> (<year>2020</year>) <volume>20</volume>(<issue>1</issue>):<fpage>55</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s40644-020-00329-8</pub-id>
</citation>
</ref>
<ref id="B20">
<label>20</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Oikonomou</surname> <given-names>A</given-names>
</name>
<name>
<surname>Wong</surname> <given-names>A</given-names>
</name>
<name>
<surname>Haider</surname> <given-names>MA</given-names>
</name>
<name>
<surname>Khalvati</surname> <given-names>F</given-names>
</name>
</person-group>. <article-title>Radiomics-based prognosis analysis for non-small cell lung cancer</article-title>. <source>Sci Rep</source> (<year>2017</year>) <volume>7</volume>(<issue>1</issue>):<elocation-id>46349</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/srep46349</pub-id>. Art. no. 1.</citation>
</ref>
<ref id="B21">
<label>21</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Forghani</surname> <given-names>R</given-names>
</name>
<name>
<surname>Savadjiev</surname> <given-names>P</given-names>
</name>
<name>
<surname>Chatterjee</surname> <given-names>A</given-names>
</name>
<name>
<surname>Muthukrishnan</surname> <given-names>N</given-names>
</name>
<name>
<surname>Reinhold</surname> <given-names>C</given-names>
</name>
<name>
<surname>Forghani</surname> <given-names>B</given-names>
</name>
</person-group>. <article-title>Radiomics and artificial intelligence for biomarker and prediction model development in oncology</article-title>. <source>Comput Struct Biotechnol J</source> (<year>2019</year>) <volume>17</volume>:<fpage>995</fpage>&#x2013;<lpage>1008</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.csbj.2019.07.001</pub-id>
</citation>
</ref>
<ref id="B22">
<label>22</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yun</surname> <given-names>J</given-names>
</name>
<name>
<surname>Park</surname> <given-names>JE</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>H</given-names>
</name>
<name>
<surname>Ham</surname> <given-names>S</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>N</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>HS</given-names>
</name>
</person-group>. <article-title>Radiomic features and multilayer perceptron network classifier: a robust MRI classification strategy for distinguishing glioblastoma from primary central nervous system lymphoma</article-title>. <source>Sci Rep</source> (<year>2019</year>) <volume>9</volume>(<issue>1</issue>):<fpage>5746</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-019-42276-w</pub-id>. Art. no. 1.</citation>
</ref>
<ref id="B23">
<label>23</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Irani</surname> <given-names>S</given-names>
</name>
</person-group>. <article-title>Distant metastasis from oral cancer: A review and molecular biologic aspects</article-title>. <source>J Int Soc Prev Community Dent</source> (<year>2016</year>) <volume>6</volume>(<issue>4</issue>):<page-range>265&#x2013;71</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.4103/2231-0762.186805</pub-id>
</citation>
</ref>
<ref id="B24">
<label>24</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mittendorf</surname> <given-names>EA</given-names>
</name>
<name>
<surname>Buchholz</surname> <given-names>TA</given-names>
</name>
<name>
<surname>Tucker</surname> <given-names>SL</given-names>
</name>
<name>
<surname>Meric-Bernstam</surname> <given-names>F</given-names>
</name>
<name>
<surname>Kuerer</surname> <given-names>HM</given-names>
</name>
<name>
<surname>Gonzalez-Angulo</surname> <given-names>AM</given-names>
</name>
<etal/>
</person-group>. <article-title>Impact of chemotherapy sequencing on local-regional failure risk in breast cancer patients undergoing breast conserving therapy,&#x201d;</article-title>. <source>Ann Surg</source> (<year>2013</year>) <volume>257</volume>(<issue>2</issue>):<page-range>173&#x2013;9</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1097/SLA.0b013e3182805c4a</pub-id>
</citation>
</ref>
<ref id="B25">
<label>25</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>K</given-names>
</name>
<name>
<surname>Folkert</surname> <given-names>M</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>H</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>S</given-names>
</name>
<name>
<surname>Sher</surname> <given-names>D</given-names>
</name>
<etal/>
</person-group>. <article-title>Multifaceted radiomics for distant metastasis prediction in head &amp; neck cancer</article-title>. <source>Phys Med Biol</source> (<year>2020</year>) <volume>65</volume>(<issue>15</issue>):<fpage>155009</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1088/1361-6560/ab8956</pub-id>
</citation>
</ref>
<ref id="B26">
<label>26</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>L-L</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>MY</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>JH</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>TS</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>B</given-names>
</name>
<etal/>
</person-group>. <article-title>Pretreatment MRI radiomics analysis allows for reliable prediction of local recurrence in non-metastatic T4 nasopharyngeal carcinoma</article-title>. <source>EBioMedicine</source> (<year>2019</year>) <volume>42</volume>:<page-range>270&#x2013;80</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ebiom.2019.03.050</pub-id>
</citation>
</ref>
<ref id="B27">
<label>27</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moradmand</surname> <given-names>H</given-names>
</name>
<name>
<surname>Aghamiri</surname> <given-names>SMR</given-names>
</name>
<name>
<surname>Ghaderi</surname> <given-names>R</given-names>
</name>
</person-group>. <article-title>&#x201c;Impact of image preprocessing methods on reproducibility of radiomic features in multimodal magnetic resonance imaging in glioblastoma,&#x201d;</article-title>. <source>J Appl Clin Med Phys</source> (<year>2020</year>) <volume>21</volume>(<issue>1</issue>):<page-range>179&#x2013;90</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/acm2.12795</pub-id>
</citation>
</ref>
<ref id="B28">
<label>28</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fave</surname> <given-names>X</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>J</given-names>
</name>
<name>
<surname>Mackin</surname> <given-names>D</given-names>
</name>
<name>
<surname>Balter</surname> <given-names>P</given-names>
</name>
<name>
<surname>Gomez</surname> <given-names>D</given-names>
</name>
<etal/>
</person-group>. <article-title>Impact of image preprocessing on the volume dependence and prognostic potential of radiomics features in non-small cell lung cancer,&#x201d;</article-title>. <source>Trans Cancer Res</source> (<year>2016</year>) <volume>5</volume>(<issue>4</issue>). doi:&#xa0;<pub-id pub-id-type="doi">10.21037/8709</pub-id>. Art. no. 4.</citation>
</ref>
<ref id="B29">
<label>29</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yaniv</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Lowekamp</surname> <given-names>BC</given-names>
</name>
<name>
<surname>Johnson</surname> <given-names>HJ</given-names>
</name>
<name>
<surname>Beare</surname> <given-names>R</given-names>
</name>
</person-group>. <article-title>SimpleITK image-analysis notebooks: a collaborative environment for education and reproducible research</article-title>. <source>J Digit Imaging</source> (<year>2018</year>) <volume>31</volume>(<issue>3</issue>):<fpage>290</fpage>&#x2013;<lpage>303</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10278-017-0037-8</pub-id>
</citation>
</ref>
<ref id="B30">
<label>30</label>
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Bradski</surname> <given-names>G</given-names>
</name>
</person-group>. <article-title>&#x201c;The OpenCV library,&#x201d;</article-title>, in: <source>Dr. dobb&#x2019;s</source> . Available at: <uri xlink:href="http://www.drdobbs.com/open-source/the-opencv-library/184404319">http://www.drdobbs.com/open-source/the-opencv-library/184404319</uri> (Accessed <access-date>Mar. 27, 2021</access-date>).</citation>
</ref>
<ref id="B31">
<label>31</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>van Griethuysen</surname> <given-names>JJM</given-names>
</name>
<name>
<surname>Fedorov</surname> <given-names>A</given-names>
</name>
<name>
<surname>Parmar</surname> <given-names>C</given-names>
</name>
<name>
<surname>Hosny</surname> <given-names>A</given-names>
</name>
<name>
<surname>Aucoin</surname> <given-names>N</given-names>
</name>
<name>
<surname>Narayan</surname> <given-names>V</given-names>
</name>
<etal/>
</person-group>. <article-title>Computational radiomics system to decode the radiographic phenotype</article-title>. <source>Cancer Res</source> (<year>2017</year>) <volume>77</volume>(<issue>21</issue>):<page-range>e104&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1158/0008-5472.CAN-17-0339</pub-id>
</citation>
</ref>
<ref id="B32">
<label>32</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McGraw</surname> <given-names>KO</given-names>
</name>
<name>
<surname>Wong</surname> <given-names>SP</given-names>
</name>
</person-group>. <article-title>Forming inferences about some intraclass correlation coefficients</article-title>. <source>psychol Methods</source> (<year>1996</year>) <volume>1</volume>(<issue>1</issue>):<fpage>30</fpage>&#x2013;<lpage>46</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1037/1082-989X.1.1.30</pub-id>
</citation>
</ref>
<ref id="B33">
<label>33</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Koo</surname> <given-names>TK</given-names>
</name>
<name>
<surname>Li</surname> <given-names>MY</given-names>
</name>
</person-group>. <article-title>A guideline of selecting and reporting intraclass correlation coefficients for reliability research</article-title>. <source>J Chiropr Med</source> (<year>2016</year>) <volume>15</volume>(<issue>2</issue>):<page-range>155&#x2013;63</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jcm.2016.02.012</pub-id>
</citation>
</ref>
<ref id="B34">
<label>34</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lema&#xee;tre</surname> <given-names>G</given-names>
</name>
<name>
<surname>Nogueira</surname> <given-names>F</given-names>
</name>
<name>
<surname>Aridas</surname> <given-names>CK</given-names>
</name>
</person-group>. <article-title>Imbalanced-learn: A Python toolbox to tackle the curse of imbalanced datasets in machine learning</article-title>. <source>J Mach Learn Res</source> (<year>2017</year>) <volume>18</volume>(<issue>17</issue>):<fpage>1</fpage>&#x2013;<lpage>5</lpage>. Available at: <uri xlink:href="http://jmlr.org/papers/v18/16-365.html">http://jmlr.org/papers/v18/16-365.html</uri>
</citation>
</ref>
<ref id="B35">
<label>35</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Buitinck</surname> <given-names>L</given-names>
</name>
<name>
<surname>Louppe</surname> <given-names>G</given-names>
</name>
<name>
<surname>Blondel</surname> <given-names>M</given-names>
</name>
<name>
<surname>Pedregosa</surname> <given-names>F</given-names>
</name>
<name>
<surname>Mueller</surname> <given-names>A</given-names>
</name>
<name>
<surname>Grisel</surname> <given-names>O</given-names>
</name>
<etal/>
</person-group>. <article-title>API Design for machine learning software: experiences from the scikit-learn project</article-title>. <source>arXiv</source> (<year>2013</year>). arXiv:1309.0238. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1309.0238</pub-id>
</citation>
</ref>
<ref id="B36">
<label>36</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Haarburger</surname> <given-names>C</given-names>
</name>
<name>
<surname>M&#xfc;ller-Franzes</surname> <given-names>G</given-names>
</name>
<name>
<surname>Weninger</surname> <given-names>L</given-names>
</name>
<name>
<surname>Kuhl</surname> <given-names>C</given-names>
</name>
<name>
<surname>Truhn</surname> <given-names>D</given-names>
</name>
<name>
<surname>Merhof</surname> <given-names>D</given-names>
</name>
</person-group>. <article-title>Radiomics feature reproducibility under inter-rater variability in segmentations of CT images</article-title>. <source>Sci Rep</source> (<year>2020</year>) <volume>10</volume>(<issue>1</issue>):<fpage>12688</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-020-69534-6</pub-id>. Art. no. 1.</citation>
</ref>
<ref id="B37">
<label>37</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vuong</surname> <given-names>D</given-names>
</name>
<name>
<surname>Bogowicz</surname> <given-names>M</given-names>
</name>
<name>
<surname>Denzler</surname> <given-names>S</given-names>
</name>
<name>
<surname>Oliveira</surname> <given-names>C</given-names>
</name>
<name>
<surname>Foerster</surname> <given-names>R</given-names>
</name>
<name>
<surname>Amstutz</surname> <given-names>F</given-names>
</name>
<etal/>
</person-group>. <article-title>Comparison of robust to standardized CT radiomics models to predict overall survival for non-small cell lung cancer patients</article-title>. <source>Med Phys</source> (<year>2020</year>) <volume>47</volume>(<issue>9</issue>):<page-range>4045&#x2013;53</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/mp.14224</pub-id>
</citation>
</ref>
<ref id="B38">
<label>38</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Efron</surname> <given-names>B</given-names>
</name>
<name>
<surname>Tibshirani</surname> <given-names>R</given-names>
</name>
</person-group>. <article-title>Improvements on cross-validation: The .632+ bootstrap method</article-title>. <source>J Am Stat Assoc</source> (<year>1997</year>) <volume>92</volume>(<issue>438</issue>):<page-range>548&#x2013;60</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.2307/2965703</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>