<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="methods-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Immunol.</journal-id>
<journal-title>Frontiers in Immunology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Immunol.</abbrev-journal-title>
<issn pub-type="epub">1664-3224</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fimmu.2023.1167241</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Immunology</subject>
<subj-group>
<subject>Methods</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Cyclone: an accessible pipeline to analyze, evaluate, and optimize multiparametric cytometry data</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Patel</surname>
<given-names>Ravi K.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2211417"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jaszczak</surname>
<given-names>Rebecca G.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2211300"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Im</surname>
<given-names>Kwok</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Carey</surname>
<given-names>Nicholas D.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2211410"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Courau</surname>
<given-names>Tristan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Bunis</surname>
<given-names>Daniel G.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1502008"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Samad</surname>
<given-names>Bushra</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2413405"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Avanesyan</surname>
<given-names>Lia</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="aff" rid="aff7">
<sup>7</sup>
</xref>
<xref ref-type="aff" rid="aff8">
<sup>8</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2229702"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chew</surname>
<given-names>Nayvin W.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2233022"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Stenske</surname>
<given-names>Sarah</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jespersen</surname>
<given-names>Jillian M.</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2384678"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Publicover</surname>
<given-names>Jean</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Edwards</surname>
<given-names>Austin W.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2212558"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Naser</surname>
<given-names>Mohammad</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2213839"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Rao</surname>
<given-names>Arjun A.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lupin-Jimenez</surname>
<given-names>Leonard</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Krummel</surname>
<given-names>Matthew F.</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cooper</surname>
<given-names>Stewart</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="aff" rid="aff7">
<sup>7</sup>
</xref>
<xref ref-type="aff" rid="aff8">
<sup>8</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Baron</surname>
<given-names>Jody L.</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="aff" rid="aff7">
<sup>7</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Combes</surname>
<given-names>Alexis J.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<xref ref-type="author-notes" rid="fn004">
<sup>&#x2021;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1451456"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Fragiadakis</surname>
<given-names>Gabriela K.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff9">
<sup>9</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<xref ref-type="author-notes" rid="fn004">
<sup>&#x2021;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1680919"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>UCSF CoLabs, University of California San Francisco</institution>, <addr-line>San Francisco, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Pathology, University of California San Francisco</institution>, <addr-line>San Francisco, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>ImmunoX Initiative, University of California San Francisco</institution>, <addr-line>San Francisco, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Department of Medicine, Division of Gastroenterology, University of California San Francisco</institution>, <addr-line>San Francisco, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>UCSF Liver Center, University of California San Francisco</institution>, <addr-line>San Francisco, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>UCSF Immunoprofiler Initiative, University of California San Francisco</institution>, <addr-line>San Francisco, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff7">
<sup>7</sup>
<institution>The Ibrahim El-Hefni Liver Biorepository at California Pacific Medical Center (IELBC)</institution>, <addr-line>San Francisco, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff8">
<sup>8</sup>
<institution>Division of General and Transplant Hepatology, California Pacific Medical Center &amp; Research Institute</institution>, <addr-line>San Francisco, CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff9">
<sup>9</sup>
<institution>Division of Rheumatology, Department of Medicine, University of California San Francisco</institution>, <addr-line>San Francisco, CA</addr-line>, <country>United States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Thien-Phong Vu Manh, INSERM U1104 Centre d&#x2019;immunologie de Marseille-Luminy (CIML), France</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Cyrille Mionnet, INSERM U1104 Centre d&#x2019;immunologie de Marseille-Luminy (CIML), France; Aur&#xe9;lien Corneau, Sorbonne Universit&#xe9;s, France</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Alexis J. Combes, <email xlink:href="mailto:alexis.combes@ucsf.edu">alexis.combes@ucsf.edu</email>; Gabriela K. Fragiadakis, <email xlink:href="mailto:gabriela.fragiadakis@ucsf.edu">gabriela.fragiadakis@ucsf.edu</email>
</p>
</fn>
<fn fn-type="equal" id="fn003">
<p>&#x2020;These authors have contributed equally to this work</p>
</fn>
<fn fn-type="other" id="fn004">
<p>&#x2021;Lead authors</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>04</day>
<month>09</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1167241</elocation-id>
<history>
<date date-type="received">
<day>16</day>
<month>02</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>04</day>
<month>08</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Patel, Jaszczak, Im, Carey, Courau, Bunis, Samad, Avanesyan, Chew, Stenske, Jespersen, Publicover, Edwards, Naser, Rao, Lupin-Jimenez, Krummel, Cooper, Baron, Combes and Fragiadakis</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Patel, Jaszczak, Im, Carey, Courau, Bunis, Samad, Avanesyan, Chew, Stenske, Jespersen, Publicover, Edwards, Naser, Rao, Lupin-Jimenez, Krummel, Cooper, Baron, Combes and Fragiadakis</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>In the past decade, high-dimensional single-cell technologies have revolutionized basic and translational immunology research and are now a key element of the toolbox used by scientists to study the immune system. However, analysis of the data generated by these approaches often requires clustering algorithms and dimensionality reduction representation, which are computationally intense and difficult to evaluate and optimize. Here, we present Cytometry Clustering Optimization and Evaluation (Cyclone), an analysis pipeline integrating dimensionality reduction, clustering, evaluation, and optimization of clustering resolution, and downstream visualization tools facilitating the analysis of a wide range of cytometry data. We benchmarked and validated Cyclone on mass cytometry (CyTOF), full-spectrum fluorescence-based cytometry, and multiplexed immunofluorescence (IF) in a variety of biological contexts, including infectious diseases and cancer. In each instance, Cyclone not only recapitulates gold standard immune cell identification but also enables the unsupervised identification of lymphocytes and mononuclear phagocyte subsets that are associated with distinct biological features. Altogether, the Cyclone pipeline is a versatile and accessible pipeline for performing, optimizing, and evaluating clustering on a variety of cytometry datasets, which will further power immunology research and provide a scaffold for biological discovery.</p>
</abstract>
<kwd-group>
<kwd>cyclone</kwd>
<kwd>CyTOF</kwd>
<kwd>spectral flow cytometry</kwd>
<kwd>spatial expression data</kwd>
<kwd>multi-parametric analysis</kwd>
<kwd>FlowSOM</kwd>
<kwd>clustering optimization</kwd>
</kwd-group>
<counts>
<fig-count count="6"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="39"/>
<page-count count="18"/>
<word-count count="10142"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Molecular Innate Immunity</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<title>Introduction</title>
<p>The advent of high-dimensional single-cell technologies has transformed our ability to study the complex array of cell types, states, and behaviors comprising the immune system (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>). Single-cell proteomics including mass cytometry (CyTOF) and high-dimensional flow cytometry enables the detection of up to 50 extra- and intracellular proteins on hundreds of thousands of cells from a sample (<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B4">4</xref>). These technologies have been applied to a wide variety of patient cohorts and animal models to gain insights into immune set points, responses, and pathology including cancer, infection, hyperinflammatory disorders, and therapeutic intervention (<xref ref-type="bibr" rid="B5">5</xref>&#x2013;<xref ref-type="bibr" rid="B8">8</xref>).</p>
<p>The high dimensionality of these data has necessitated the development and application of tools that can parse these data in a semi-automated way. This includes identifying cell populations via clustering algorithms (<xref ref-type="bibr" rid="B9">9</xref>&#x2013;<xref ref-type="bibr" rid="B11">11</xref>), dimensionality reduction approaches for stratifying samples (<xref ref-type="bibr" rid="B12">12</xref>, <xref ref-type="bibr" rid="B13">13</xref>), and visualization software and statistical packages for downstream analysis (<xref ref-type="bibr" rid="B14">14</xref>&#x2013;<xref ref-type="bibr" rid="B16">16</xref>). Due to the diversity and complexity of the immune system, the use of clustering algorithms and dimensionality reduction has become increasingly standard for immune monitoring across tissues and species. However, to date, a consensus process has not been established, rendering the comparison of this type of analysis difficult. While many tools have been both introduced and evaluated, many researchers such as wet-lab immunologists with limited computational experience struggle to navigate the vast landscape of tools for processing and analyzing cytometry datasets. Challenges in algorithm selection, run accessibility and scalability, and chaining the tools for each stage of analysis (preprocessing (<xref ref-type="bibr" rid="B17">17</xref>, <xref ref-type="bibr" rid="B18">18</xref>), batch correction, clustering, and downstream analysis) pose significant barriers in the analysis of cytometry data. Moreover, many of these clustering algorithms require selecting a clustering resolution (i.e., selecting the number of clusters), which is largely arbitrary and may reduce the unsupervised nature of these methods. Therefore, an integrated cluster evaluation (<xref ref-type="bibr" rid="B19">19</xref>, <xref ref-type="bibr" rid="B20">20</xref>) step is needed to compare different resolutions, guide clustering optimization, and facilitate a tool&#x2019;s usage.</p>
<p>Here, we present the CYtometry CLustering Optimization aNd Evaluation (Cyclone) (github.com/UCSF-DSCOLAB/cyclone/) pipeline for the analysis of a wide range of cytometry data, including but not limited to CyTOF, fluorescence-based cytometry, and multiplexed immunofluorescence (IF). Cyclone clusters data using FlowSOM (<xref ref-type="bibr" rid="B9">9</xref>)&#x2014;selected based on scalability and fidelity to manual gating&#x2014;and allows users to optimize and evaluate cluster resolution based on both stability and user exploration. We present Cyclone&#x2019;s performance on CyTOF datasets as well as other single-cell technologies including spectral flow cytometry and imaging. We designed Cyclone to be interoperable with outputs of the leading batch correction algorithms (<xref ref-type="bibr" rid="B21">21</xref>, <xref ref-type="bibr" rid="B22">22</xref>) and to feed into accessible downstream analysis tools (<xref ref-type="bibr" rid="B15">15</xref>, <xref ref-type="bibr" rid="B16">16</xref>). We additionally accommodate both large datasets as well as showcase Cyclone&#x2019;s performance on downsampled data, increasing its accessibility to those with either extensive or limited computational resources. Leveraging Cyclone on a seven-color multiplexed immunofluorescence dataset obtained from human colorectal and kidney tumors, we identified a distinct tumor-associated classical dendritic cell subset. We share the R-based pipeline publicly along with extensive documentation for its use in conjunction with upstream and downstream tools. With this largely &#x201c;plug and play&#x201d; pipeline, we hope to lower the barrier to entry into high-dimensional cytometry analysis and provide a consensus tool to the research community.</p>
</sec>
<sec id="s2" sec-type="results">
<title>Results</title>
<sec id="s2_1">
<title>Building a scalable pipeline for analysis of high-dimensional cytometry data</title>
<p>To create a functional and accessible workflow for cytometry datasets, we were interested in building a pipeline optimized to meet criteria that address challenges in analyses of this nature (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1A</bold>
</xref>). This included 1) a framework optimized for interoperability, versatility, and ease of use, 2) designed to receive standard inputs from pre-processing steps including batch correction and 3) provide a set of outputs that can easily serve as inputs to downstream analysis and visualization tools. The pipeline required both the selection of a scalable and accurate clustering algorithm robust to downsampling and the ability to tune and evaluate clustering resolution to best meet the specifics of the biological system and scientific inquiry. Due to the wealth of existing single-cell technologies and the need for multiple measurement types to fully understand complex biological systems, we were additionally interested in developing a pipeline that would accommodate a range of single-cell cytometric data modalities including imaging.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Building a scalable pipeline and optimizing clustering for analysis of high-dimensional cytometry data. <bold>(A)</bold> Graphical depiction of Cyclone pipeline. Cyclone can intake multiple types of cytometry data, including Flow, CyTOF, and imaging data, and works for both raw and batch-corrected data. FlowSOM clustering includes a grid/clustering optimization step, where a set list of grids is calculated. After the user selects a desired resolution, Cyclone generates useful plots as well as processed data frames that can be easily handed off to other analysis tools and plotting packages, like dittoSeq or ggplot2. <bold>(B)</bold> Clustering tool performance on data through the lens of scalability (runtime and memory required) of tested tools. CLARA and FlowSOM were similar in their time and memory requirements, while PARC and PhenoGraph have less feasible runtime or memory requirements. <bold>(C)</bold> Clustering time increases as cluster number (k) increases. FlowSOM still performs better than CLARA when measuring time to cluster, regardless of cluster number requested. <bold>(D)</bold> Optimization of clustering via evaluation of the different resolutions, leveraging Davis&#x2013;Bouldin index as indicator (subset of full number of grids assessed; full amount of grids evaluated in the supplement). <bold>(E)</bold> Ground Truth expert &#x201c;coarse&#x201d;-level annotation identifying broad cell types based on manual gating. <bold>(F)</bold> FlowSOM cluster annotation at &#x201c;coarse&#x201d;-level based on CyTOF panel expression. <bold>(G)</bold> Heatmap comparing &#x201c;coarse&#x201d;-level annotations assignment based on the &#x201c;ground truth&#x201d; manual gating (rows) of full dataset <italic>vs.</italic> FlowSOM clustering annotated by expert immunologist based on Cyclone cluster heatmap outputs (columns). <bold>(H)</bold> Comparison metrics based on &#x201c;coarse&#x201d;-level annotations from two expert immunologists. Various performance metrics were used to assess the accuracy of clusters called in the FlowSOM clustering compared to ground truth.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-14-1167241-g001.tif"/>
</fig>
<p>A critical component of the pipeline was to select a clustering algorithm that could 1) scale to a large number of cells and parameters while maintaining reasonable runtimes and memory usage and 2) recapitulate populations identified via expert manual gating. We therefore evaluated a subset of available clustering algorithms, focusing on popular algorithms shown to perform well in the literature (<xref ref-type="bibr" rid="B23">23</xref>, <xref ref-type="bibr" rid="B24">24</xref>). These included PhenoGraph (<xref ref-type="bibr" rid="B11">11</xref>), a graph-based community detection algorithm; CLARA (<xref ref-type="bibr" rid="B25">25</xref>), an extension of the partitioning around medoids algorithm; FlowSOM (<xref ref-type="bibr" rid="B9">9</xref>), which uses self-organizing map clustering; and PARC (<xref ref-type="bibr" rid="B10">10</xref>), a recent combinatorial graph-based algorithm optimized for scalability. We applied these algorithms to a CyTOF dataset measuring the expression of 42 proteins at the single-cell level on peripheral blood mononuclear cells (PBMCs) from 17 individuals (Methods, <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplemental Table&#xa0;1</bold>
</xref>). We also subset the dataset to various sizes ranging from 1,000 to 10,000 to 50,000 cells per individual to evaluate both speed and memory usage on a range of dataset sizes (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1B</bold>
</xref>). While memory usage was similar across algorithms (except for PhenoGraph performance on the full dataset, which did not conclude in a reasonable amount of time), FlowSOM and CLARA utilized the least runtime across dataset sizes. We therefore proceeded to evaluate those two clustering algorithms on the full dataset. Notably, increasing the requested cluster number identified by the algorithm across the variously sized datasets strongly increased runtime for CLARA as compared to FlowSOM (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1C</bold>
</xref>), which appeared more time efficient even with larger cluster counts regardless of cell numbers. This indicated that CLARA takes more time than FlowSOM to perform a high-resolution clustering to identify cell subsets.</p>
<p>While our final pipeline does focus on R-based implementation for clustering (FlowSOM and CLARA) and permits maximum interoperability with other R-based normalization, debarcoding, and batch correction packages, early investigations explored annotations from both R and python packages; thus, we benchmarked FlowSOM and PARC cluster annotations against manual gating (<xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Figure&#xa0;1</bold>
</xref>) to evaluate their performance in identifying cell populations. To compare the accuracy of annotating clusters based on CyTOF panel expression to gating ground truth, we developed a custom cell barcode scheme (see Methods&#x2014;FCS modifications) to identify cells in the FCS and facilitate comparison of a cell&#x2019;s ground truth gating-based assignment versus how the cell was annotated by two expert immunologists. To select a resolution/grid for annotation and evaluation, we calculated the Davies&#x2013;Bouldin index (DBI) (<xref ref-type="bibr" rid="B19">19</xref>), a within-dataset similarity metric used to evaluate cluster resolution, across a variety of cluster grid sizes (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1D</bold>
</xref>, <xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure&#xa0;2A</bold>
</xref>). Thirty-six clusters (6 &#xd7; 6 grid, FlowSOM) or 37 clusters (Resolution 1.3, PARC) were selected for independent annotation by two expert immunologists to compare to &#x201c;ground truth&#x201d; manual gates from a third expert immunologist. Both ground truth gating and cluster annotations were performed for major immune cell populations (&#x201c;coarse-level&#x201d;, e.g., all CD4<sup>+</sup> T-cell subsets including memory or na&#xef;ve are annotated as CD4<sup>+</sup> T) (<xref ref-type="fig" rid="f1">
<bold>Figures&#xa0;1E, F</bold>
</xref>) and for more fine-grained sub-populations (&#x201c;fine-level&#x201d;, <xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figures&#xa0;2B&#x2013;E</bold>
</xref>); these annotations were then visualized in UMAP space. Both FlowSOM and PARC coarse annotations performed well in the recapitulation of manual ground truth gating as captured by four evaluation metrics, including accuracy, adjusted Rand index (<xref ref-type="bibr" rid="B26">26</xref>), Fowlkes&#x2013;Mallows index (<xref ref-type="bibr" rid="B27">27</xref>), and mutual information (<xref ref-type="bibr" rid="B28">28</xref>) (<xref ref-type="fig" rid="f1">
<bold>Figures&#xa0;1G, H</bold>
</xref>). As expected, while performance was not as strong on fine annotations, metrics showed reasonably accurate clustering for both clustering algorithms (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure&#xa0;2C</bold>
</xref>). Some sources of error included the fact that global clustering did not isolate small subsets including cDC1s or antibody-secreting cells (ASCs) as their own cluster at the selected resolution due to its low abundance. In addition, the accuracy of some subsets of CD4<sup>+</sup> and CD8<sup>+</sup> T cells varied due to their definition based on markers that have a continuum of expression rather than clear positive and negative expression (e.g., CD45RA). However, the unsupervised nature of the clustering facilitated the identification of cell subsets not included in our manual gating, such as CD4<sup>&#x2212;</sup>CD8<sup>&#x2212;</sup> T cells and intermediate monocytes (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1G</bold>
</xref>), and B-cell subsets based on the expression of CXCR5 expression or CD38 (<xref ref-type="supplementary-material" rid="SM2">
<bold>Supplemental 2B</bold>
</xref>). Notably, while the majority of clusters were easily recognizable and annotated, a small number of clusters were observed to be dispersed across the UMAP visualization (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure&#xa0;2F</bold>
</xref>), rather than having a more homogenous phenotype (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure&#xa0;2G</bold>
</xref>), which illuminated the utility of visualizing each cluster&#x2019;s dispersion as an output for a developed pipeline. Taken together, these data demonstrate that while PARC and FlowSOM perform similarly compared to expert gating and have similar performance on low numbers of cells, FlowSOM outperforms when considering scalability to higher numbers of cells.</p>
</sec>
<sec id="s2_2">
<title>The Cyclone pipeline is a method for data clustering and evaluation across resolutions for use in the analysis of cytometry data</title>
<p>Based on our observations regarding runtime, memory usage, cluster evaluation methods, and performance on manual gating recapitulation, we developed the Cyclone pipeline in R (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>). We designed Cyclone to produce a series of outputs that users can access to select cluster resolution, evaluate cluster quality, annotate clusters, and utilize resulting cluster statistics (phenotypes and abundance) for downstream analyses. To start Cyclone, the user provides FCS or matrix data files as well as files to specify markers and file metadata. Cyclone expects any normalization and batch correction steps to be performed prior to the use of the pipeline. For CyTOF data, this can be performed by established R packages, including premessa (github.com/ParkerICI/premessa) for bead-based normalization and CytoNorm (<xref ref-type="bibr" rid="B21">21</xref>) or cyCombine (<xref ref-type="bibr" rid="B22">22</xref>) for batch correction. After reading in and arcsinh transforming the data, Cyclone calculates UMAP dimensionality reduction. Cyclone works with either FlowSOM or CLARA for clustering; selecting FlowSOM enables DBI-based cluster resolution optimization prior to user grid selection, while with CLARA, the user selects a single resolution for clustering. If FlowSOM is selected, Cyclone then performs iterative optimization of clustering across a variety of cluster grid sizes, which can then be compared using DBI. After the user selection of desired grid, Cyclone performs clustering, generates summary matrices, proceeds through an optional SCAFFoLD step, and then generates output files and visualizations. While not providing batch correction, Cyclone does provide a means for assessing batch or any other input file metadata via UMAP (split by batch) visualization (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>) as well as clustered heatmaps of cluster frequency with batch or other metadata information overlaid as rugs (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3B</bold>
</xref>). In addition to a DBI plot for cluster resolution selection, Cyclone outputs UMAPs colored by cluster (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3C</bold>
</xref>) as well as heatmaps of marker expression per cluster for ease of cluster annotation (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3D</bold>
</xref>). Additional UMAPs and histogram plots are provided per cluster, showing the cluster&#x2019;s distribution across the UMAP for an evaluation of cluster dispersion as an indication of cluster quality (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3E</bold>
</xref>). Feature UMAP plots of marker expression in all cells are also exported by default (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3F</bold>
</xref>). Taken together, the Cyclone pipeline provides a means of clustering, as well as evaluating and annotating these clusters, to then be readily used in downstream analyses.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Integration of different pipeline pieces and packages into a single method: &#x201c;Cyclone&#x201d;. Using information from initial metadata files (see <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplemental Tables</bold>
</xref>), FCS files are read into the pipeline. After arcsinh transformation, UMAP is calculated and clustering is performed on a default range of grid sizes, resulting in a cluster VS DBI plot for clustering grid selection. After user input to select a specific grid, Cyclone generates feature plots of each antibody included in the panel, split UMAPs to assess cluster dispersion, various QC heatmaps and UMAPs, and a final UMAP annotated by cluster number. Additionally, expression matrices and cell metadata matrices are saved.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-14-1167241-g002.tif"/>
</fig>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Evaluation and interpretation of default pipeline outputs and results. <bold>(A)</bold> UMAPs of batch information. If batches were a part of the CyTOF run, Cyclone exports UMAP plots split by batch information to assess batch correction or batch effect. <bold>(B)</bold> Heatmaps of file &#xd7; cluster and batch &#xd7; cluster depicting cell frequency per cluster, and file &#xd7; feature depicting arcsign transformed data. <bold>(C)</bold> UMAP annotated by cluster number based on user-selected grid. <bold>(D)</bold> Heatmap of median archsinh-transformed expression (unscaled) per cluster used to annotate clusters. <bold>(E)</bold> Example plots of UMAP + histogram showcasing cluster density/dispersion. <bold>(F)</bold> Example plots of protein feature expression.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-14-1167241-g003.tif"/>
</fig>
</sec>
<sec id="s2_3">
<title>Cyclone provides accessibility and interoperability with upstream processing and downstream analyses</title>
<p>We developed Cyclone to be easily leveraged by researchers with various backgrounds, especially those with limited coding knowledge and minimal computational resources. We also prioritized interoperability with both upstream and downstream tools for cytometry data processing and analysis (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4A</bold>
</xref>). In addition to significant documentation (including vignettes to get users started with the pipeline), we evaluated Cyclone on downsampled datasets to determine whether downsampling to fewer cells could provide an alternative for users to run Cyclone locally rather than needing additional compute resources for analyses. Thus, we downsampled our evaluation dataset to 50,000 cells per sample and coarsely annotated the resulting optimized clusters (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4B</bold>
</xref>, <xref ref-type="supplementary-material" rid="SF3">
<bold>Supplemental Figures&#xa0;3A, B</bold>
</xref>). We then compared the accuracy of these coarse downsampled annotations to the coarse annotations on the full dataset and encouragingly found strong concordance between the full and downsampled datasets (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4C</bold>
</xref>). When doing fine annotations (<xref ref-type="supplementary-material" rid="SF3">
<bold>Supplemental Figure&#xa0;3C</bold>
</xref>), metrics again showed reasonably accurate annotations when compared to the full dataset fine annotations (<xref ref-type="supplementary-material" rid="SF3">
<bold>Supplemental Figure&#xa0;3D</bold>
</xref>). Thus, data downsampling presents a practical option for dataset clustering with Cyclone should computing resources be a challenge for some users.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Accessibility of Cyclone&#x2014;downsampling and interoperability with upstream and downstream processing. The dataset was downsampled to 50k cells per sample and then run through Cyclone. Clusters&#x2019; cell-type identities were inferred by experts using Cyclone plot outputs. Then, Cyclone outputs from the full dataset were imported into R and read into a SingleCellExperiment object (<xref ref-type="bibr" rid="B29">29</xref>) so that further visualization and analysis could be carried out with dittoSeq. <bold>(A)</bold> Overview of interoperability challenges and our solutions. <bold>(B)</bold> UMAP from 50k downsample run, colored by coarse annotations of one expert annotator. <bold>(C)</bold> Comparison of per-cell annotations between the 50k downsample versus the full dataset. <bold>(D)</bold> Example SCAFFoLD map export depicting the assignment of each cluster (blue) to a landmark population (red). The circle size corresponds to the number of cells in the cluster. <bold>(E)</bold> Box plot showing per-sample cluster frequencies grouped by sex of the patients for coarse-level cell types created with dittoSeq&#x2019;s dittoFreqPlot function. <bold>(F)</bold> Stacked bar plot showing the percent of cells in each cluster by sex of the patients created with dittoSeq&#x2019;s dittoBarPlot function.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-14-1167241-g004.tif"/>
</fig>
<p>To optimize Cyclone interoperability with upstream and downstream processing, we considered batch correction as a primary upstream target and SCAFFoLD map analysis and accessible visualization as primary downstream targets. For batch correction, we ensured a pathway exists to enter the Cyclone workflow with cytometry data in either a matrix format [as output by cyCombine (<xref ref-type="bibr" rid="B22">22</xref>)] or adjusted-FCS files [as output by CytoNorm (<xref ref-type="bibr" rid="B21">21</xref>)] to accommodate interoperability with outputs from both commonly used batch correction methods. CytoNorm&#x2019;s adjusted-FCS files can enter into the pipeline as normal, but Cyclone&#x2019;s prepare_checkpoint1() provides an entry point for cyCombine&#x2019;s matrix format. The function takes the same primary inputs as a run without a batch correction step, except for accepting a matrix (with cells in rows and markers in columns) instead of the FCS directory. The function then performs all the same optional steps (arcsinh transformation, control sample removal, and subsampling) before outputting a Checkpoint1.Rdata file. Afterward, users can run Cyclone as normal to continue from the second step of UMAP calculation.</p>
<p>To aid in cluster annotation, SCAFFoLD connects clusters to unique landmark populations based on the cosine similarity in the feature expression space (<xref ref-type="bibr" rid="B15">15</xref>). For powering SCAFFoLD map downstream analysis, we added an optional step directly into the Cyclone pipeline; if landmark population FCS files are provided, an output *.scaffold file can be used to generate a SCAFFoLD map (<xref ref-type="bibr" rid="B15">15</xref>) via the &#x201c;scaffold::scaffold.run()&#x201d; command (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4D</bold>
</xref>). For powering accessible visualization and other downstream follow-ups, we ensured that the colors of Cyclone plot outputs use color blindness-accessible color palettes and that data outputs can be used with dittoSeq&#x2013;a color blindness-friendly visualization tool (<xref ref-type="bibr" rid="B16">16</xref>). Although dittoSeq was designed for single-cell RNAseq data, it proves generalizable to other data modalities including high-dimensional cytometry. To directly power dittoSeq integration and downstream statistical analyses, we provide an additional vignette showing how to 1) transform Cyclone data objects into a SingleCellExperiment [(<xref ref-type="bibr" rid="B29">29</xref>), Chapter four] object compatible with dittoSeq, 2) generate useful visualizations, 3) add cell-type annotations for each cluster as well as further annotations such as spatial information in the case of imaging data, and 4) run statistics on differences in cluster or cell-type frequencies between samples. Shown as examples are boxplots of how cluster frequencies compare between male and female subjects (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4E</bold>
</xref>) and the percent composition of each cluster in terms of subject sex (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4F</bold>
</xref>); these functions make probing metadata categories of interest easy to code and assist in producing publication-ready visualization. These accessible (in both the color palate and easily leveraged graphing functions) visualizations are readily created with dittoSeq from Cyclone outputs.</p>
</sec>
<sec id="s2_4">
<title>The Cyclone pipeline generalizes to flow cytometry datasets</title>
<p>While optimization and development of Cyclone were initially for CyTOF datasets, we realized the need for such analysis techniques on a broader set of similar data modalities, including other cytometry platforms such as spectral flow cytometry. To evaluate Cyclone&#x2019;s utility for this analysis, we applied the Cyclone pipeline to a spectral flow cytometry dataset of mouse liver samples from a transgenic mouse model for hepatitis B virus (HBV) response (<xref ref-type="bibr" rid="B30">30</xref>). Liver leukocytes collected from eight mice on day 8, the peak of the immune response in this model, were analyzed using a myeloid-focused spectral flow cytometry panel (<xref ref-type="supplementary-material" rid="SM2">
<bold>Supplemental Table&#xa0;2</bold>
</xref>). In a repeat experiment, day 8 liver leukocytes from nine mice were analyzed using a lymphoid-focused panel (<xref ref-type="supplementary-material" rid="SM3">
<bold>Supplemental Table&#xa0;3</bold>
</xref>). The Cyclone pipeline could be used as built, but we used config files to specify the co-factor for the arcsinh transformation to a value more typical of flow cytometry datasets (see Spectral flow data generation and analysis&#x2014;Cyclone analysis in Methods). For both experiments, CD45<sup>+</sup> live cells were provided to the Cyclone pipeline. After selecting the local minimum DBI, we identified and annotated 21 clusters in the T Cell panel (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5A</bold>
</xref>) and 17 clusters from the Myeloid panel (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5B</bold>
</xref>). Both panels were able to distinguish several T-cell and NK-cell subsets (<xref ref-type="supplementary-material" rid="SF4">
<bold>Supplemental Figure&#xa0;4A</bold>
</xref>) as well as liver resident macrophages (Kupffer cells) and monocyte-derived macrophages (<xref ref-type="supplementary-material" rid="SF4">
<bold>Supplemental Figure&#xa0;4B</bold>
</xref>). As was previously observed for CyTOF data, unsupervised clustering using Cyclone largely recapitulated the populations identified by manual expert gating at a coarse-level (<xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5C, D</bold>
</xref>). Cyclone further enabled the identification of cell subsets across immune cell lineages (<xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5C, D</bold>
</xref>), which could have been missed by manual gating, such as NK-cell subsets with various expression of KLRG1 or CD62L (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5A</bold>
</xref>, <xref ref-type="supplementary-material" rid="SF4">
<bold>Supplemental Figure&#xa0;4A</bold>
</xref>). To evaluate the biological information contained within the unsupervised clustering, we took advantage of the presence of tetramer staining to identify the properties of the HBV-specific T cells. While the tetramer staining was not used as a clustering parameter, we found that the vast majority of CD8<sup>+</sup> (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5E</bold>
</xref>) and CD4<sup>+</sup> (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5F</bold>
</xref>) antigen-specific T cells were enriched in one or two effector T-cell clusters with high expression of activation markers identified by Cyclone for each T-cell subset (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5G</bold>
</xref>). Taken together, we observed that Cyclone performed well on spectral flow data and enabled the unsupervised identification of cell phenotypes that are associated with distinct biological features.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Utilization of Cyclone pipeline for spectral flow cytometry data. Liver leukocytes from HBVEnvRag<sup>&#x2212;/&#x2212;</sup> mice were harvested 8 days after adoptive transfer with wild-type splenocytes for spectral flow cytometry analysis. Cyclone pipeline was run on this spectral flow cytometry data. <bold>(A)</bold> &#x201c;Fine&#x201d;-level annotated UMAP of 22-color T cell-focused spectral flow cytometry panel (<xref ref-type="supplementary-material" rid="SM3">
<bold>Supplemental Table&#xa0;3</bold>
</xref>) run on nine mouse samples. <bold>(B)</bold> &#x201c;Fine&#x201d;-level annotated UMAP of 25-color myeloid-focused spectral flow cytometry panel (<xref ref-type="supplementary-material" rid="SM2">
<bold>Supplemental Table&#xa0;2</bold>
</xref>) run on eight mouse samples. Three unidentifiable &#x201c;junk&#x201d; clusters were removed from this UMAP. <bold>(C)</bold> Heatmap of &#x201c;coarse&#x201d;-level cell-type annotations comparing expert manual gating identities (rows) to Cyclone cluster annotations (columns) in <bold>(A)</bold> (T cell-focused panel). <bold>(D)</bold> Heatmap of &#x201c;coarse&#x201d;-level cell-type annotations comparing expert manual gating identities (rows) to Cyclone cluster annotations (columns) in panel <bold>(B)</bold> (Myeloid-focused panel). <bold>(E)</bold> Magnified section of panel <bold>(A)</bold> showing density plot of HBV-specific MHC class I tetramer+ CD8<sup>+</sup> T cells. <bold>(F)</bold> Magnified section of panel <bold>(A)</bold> showing expression of HBV-specific MHC class II tetramer+ on CD4<sup>+</sup> T cells. <bold>(G)</bold> Frequencies of &#x201c;fine&#x201d;-level cluster annotations among Tetramer+ CD8<sup>+</sup> or CD4<sup>+</sup> T cells. Tetramer+ cells were defined as events with fluorescence intensities 3 or more standard deviations above mean fluorescence in their respective channels.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-14-1167241-g005.tif"/>
</fig>
</sec>
<sec id="s2_5">
<title>Cyclone enables unsupervised discovery of cell-type compartmentalization within the tumor microenvironment</title>
<p>Understanding how the phenotype of individual cells relates to the function of multicellular compartments within tissues requires the ability to identify cellular phenotypes with multiple proteins while simultaneously quantifying the spatial distribution and interactions of these cells across large regions of tissue. Along with its applications demonstrated already, Cyclone provides a unique opportunity to analyze the spatial distribution of individual cell phenotypes as well as cellular neighborhoods in an unsupervised manner. To that end, we tested how the Cyclone pipeline compared to a prototypical imaging data analysis pipeline containing image visualization. First, we created a 7-plex immunofluorescent staining panel to be used on a colorectal tumor tissue (CRC1) consisting of a tumor marker (EPCAM), T-cell markers (CD3, CD4, and CD8), and myeloid markers (CD163, HLA-DR, and XCR1) (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6A</bold>
</xref>). Next, we utilized DeepCell (<xref ref-type="bibr" rid="B31">31</xref>) segmentation software to demarcate individual cells by inputting nuclei (DAPI<sup>+</sup>) and membrane markers (CD3<sup>+</sup>, CD4<sup>+</sup>, CD8<sup>+</sup>, and CD163<sup>+</sup>) and labeled each cell-type annotation based on rational gating parameters (e.g., CD4<sup>+</sup> T cell = CD3<sup>+</sup>CD4<sup>+</sup>CD8<sup>&#x2212;</sup>XCR1<sup>&#x2212;</sup>) (<xref ref-type="supplementary-material" rid="SF5">
<bold>Supplemental Figure&#xa0;5A</bold>
</xref>, table). We found that determining the manual thresholding on certain markers such as CD163<sup>+</sup> expression was not visually clear in designating a suitable cutoff as compared to CD3<sup>+</sup> expression and that this could ultimately lead to variability in the frequency of cell types annotated (<xref ref-type="supplementary-material" rid="SF5">
<bold>Supplemental Figure&#xa0;5A</bold>
</xref>, histogram plots). For example, the threshold cutoff at 11 observed CD163<sup>+</sup> expression on low-background, segmented cells, while the threshold cutoff at 13 missed CD163<sup>+</sup> cells. Thus, we opted for a middle ground by choosing threshold 12 for the downstream comparison with our Cyclone pipeline. To run the Cyclone pipeline on this multiplexed immunofluorescence data, we followed the following procedure. Once raw expression values of each marker for every cell were curated and assigned to each cell identified by DeepCell, we obtained a cell per protein expression matrix used to run the Cyclone pipeline (see Methods). After DBI evaluation, we chose a grid size of 2 &#xd7; 4, which had the lowest DBI value (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6B</bold>
</xref>). This resolution generated eight unique clusters spanning immune and non-immune cells (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6C</bold>
</xref>). These clusters comprised tumor cells expressing different levels of HLA-DR (Clusters 2 and 4), CD4<sup>+</sup> and CD8<sup>+</sup> T cells (Clusters 3 and 8), mononuclear phagocytes (MNPs) (Clusters 5, 6, and 7), and one cluster with low expression for all markers (Cluster 1) (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6D</bold>
</xref>). Interestingly, among the different MNPs, Cluster 7 is characterized by the high expression of XCR1<sup>+</sup> and CD163<sup>+</sup> and low HLA-DR expression. Since the combination of the Cyclone and DeepCell segmentation outputs provides cluster identities (Cyclone) and x,y coordinates (DeepCell) for each cell within the same data frame, we leveraged this to evaluate where these XCR1<sup>+</sup>CD163<sup>+</sup> cells were located within the tissue. When overlaying Cluster 7 onto the image of CRC1, cells depicted as Cluster 7 (green arrows), we were able to confirm that they had both XCR1<sup>+</sup> and CD163<sup>+</sup> expression (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6E</bold>
</xref>). Additionally, these cells had an enriched spatial distribution within the stromal compartment as opposed to cDC1s and other clusters enriched in the tumor compartment (<xref ref-type="fig" rid="f6">
<bold>Figures&#xa0;6F, G</bold>
</xref>, <xref ref-type="supplementary-material" rid="SF5">
<bold>Supplemental Figure&#xa0;5B</bold>
</xref>). We next validated that XCR1<sup>+</sup>CD163<sup>+</sup> cell types could be found in other colorectal (CRC2) and kidney (KID1) tumor samples subjected to Cyclone (<xref ref-type="supplementary-material" rid="SF5">
<bold>Supplemental Figure&#xa0;5C</bold>
</xref>). However, these samples observed different stromal and tumor enrichments for CD4<sup>+</sup> T cells, cDC1s, and XCR1<sup>+</sup>CD163<sup>+</sup> cells compared to CRC1.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Utilization of Cyclone pipeline for imaging data. <bold>(A)</bold> Representative immunofluorescence imaging of colorectal tumor biopsy using tumor marker [EPCAM (blue)], T cell markers [CD3 (red), CD4 (green), and CD8 (white)], and myeloid markers [CD163 (purple), HLA-DR (cyan), and XCR1 (yellow) staining]. Scale bar denotes 500 &#x3bc;m. <bold>(B)</bold> Scatterplot of the Davies&#x2013;Bouldin index and cluster size over multiple iterations of Louvain clustering and varying parameters using seven markers used in immunofluorescence as in panel <bold>(A)</bold> Grid size 2 &#xd7; 4 was chosen for downstream analysis (green dot, yellow circle). <bold>(C)</bold> UMAP visualization of 45,177 cells from the colorectal tumor biopsy with specific populations annotated based on <bold>(D)</bold> an arcsinh-transformed expression heatmap of all markers row-scaled. <bold>(E)</bold> Representative immunofluorescence imaging of tumor biopsy (merge; left) with inset (middle) of XCR1<sup>+</sup> (yellow) and CD163<sup>+</sup> (blue) staining. Cluster 7 from Cyclone pipeline was overlaid and annotated (green arrows) representing XCR1<sup>+</sup>CD163<sup>+</sup> cells. Scale bar denotes 20 &#x3bc;m. <bold>(F)</bold> Full image representation (left) of stromal (pink) and tumor (blue) regions in tumor tissue biopsy (top left) with cDC1<sup>+</sup> cell Cluster 6 (top right), XCR1<sup>+</sup>CD163<sup>+</sup> cell Cluster 7 (bottom left), and CD4<sup>+</sup> T cell Cluster 8 (bottom right) overlays (black dots). <bold>(G)</bold> Log2 fold change bar plot on stromal/tumor ratio of each cluster annotation in CRC1 tumor sample.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-14-1167241-g006.tif"/>
</fig>
<p>Notably, while conventional manual gating for XCR1<sup>+</sup>CD163<sup>+</sup> cells is possible, the thresholding strategy of comparing independent markers with strict cutoffs likely misses out on what Cyclone identifies when leveraging the entire set of markers to identify cell types (<xref ref-type="supplementary-material" rid="SF5">
<bold>Supplemental Figure&#xa0;5D</bold>
</xref>). Taken together, we highlight the successful application of the Cyclone pipeline on multiplexed imaging data in tumor tissue to discriminate two different XCR1<sup>+</sup> subsets and their spatial features within the tumor microenvironment.</p>
</sec>
</sec>
<sec id="s3" sec-type="discussion">
<title>Discussion</title>
<p>In this work, we present the Cyclone pipeline&#x2014;a versatile and accessible pipeline for performing, optimizing, and evaluating clustering on cytometry datasets. The pipeline takes in single-cell measurements, performs high-dimensional clustering, allows the user to select a clustering resolution with guided metrics, and provides outputs for facile cluster annotation and downstream analysis. We confirmed the fidelity of FlowSOM clustering to expert manual gating, as well as its performance on high-dimensional datasets; to date, we have successfully applied the Cyclone pipeline with FlowSOM clustering to a 42-parameter CyTOF dataset of 50 million cells. We have released the pipeline code and documentation with the aim of making it accessible to the greater community, where it has already begun to be applied.</p>
<p>The selection of FlowSOM was based on identifying a clustering algorithm 1) that provided accurate identification of cell populations in clustering as compared to manual gating and 2) that was reasonably scalable to large datasets. In our evaluation, we found that FlowSOM could handle datasets of up to 50 million cells, while also identifying both our coarse and fine manually gated cell populations at a high level of accuracy (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>). Our assessment agrees with the benchmarking literature that also found FlowSOM to capture both abundant and rare cell populations with reasonable runtime (<xref ref-type="bibr" rid="B23">23</xref>). Central to this work was to build a tool that would be available to and easy to use by the research community, including wet-lab scientists, such that researchers are better empowered to engage with their cytometry datasets. In our collaborative research model, we brought together the computational rigor from data scientists with the perspectives, challenges, and biological intuition of wet-lab biologists for the design and development of the Cyclone pipeline. This included the consideration of high-performance computational resource access, which is not always available or accessible to wet-lab scientists. We therefore validated Cyclone&#x2019;s performance on downsampled data to offer an option for running the pipeline locally on a personal laptop. This also included addressing the challenges of varied inputs (i.e., starting with varied file formats, either FCS files or a more general matrix format), as well as a computational hand-off to downstream analysis tools, either of which could erect a barrier to researchers less proficient with coding. While commercially available tools offer clustering pipelines such as OMIQ, CellEngine, and Cytobank, it was important to us to offer a freely available pipeline that can be flexible to the computational resources available to the researcher as well as accommodate a large number of cells. In addition, we pair this clustering with the ability to evaluate clustering resolutions and perform dimensionality reduction to better enable downstream analysis.</p>
<p>Cyclone is readily generalized to a variety of cytometry datasets beyond CyTOF data, demonstrating its versatility as a pipeline for high-dimensional datasets that can be extensible and adaptable as technologies continue to evolve. We demonstrate the application of Cyclone to spectral flow cytometry data, in this case in the setting of a mouse model of viral infection, as well as immunofluorescence imaging data of the tumor microenvironment. Excitingly, Cyclone has been further applied to a CO-Detection by indEXing (CODEX) (<xref ref-type="bibr" rid="B32">32</xref>, <xref ref-type="bibr" rid="B33">33</xref>) dataset as well as a series of other CyTOF datasets (<xref ref-type="bibr" rid="B32">32</xref>) and spectral flow datasets (data not shown), further confirming its versatility.</p>
<p>These additional applications of Cyclone not only validated that the pipeline was functional in these settings but also demonstrated its strengths in identifying elements of the biological systems that may be overlooked with manual gating. Clustering of the tumor microenvironment imaging data revealed a CD163<sup>+</sup>XCR1<sup>+</sup> cell subset that was enriched in the stroma region. It is now well accepted that XCR1 expression defines the classical dendritic cells of type 1 (cDC1) across a wide range of organisms (<xref ref-type="bibr" rid="B34">34</xref>). It is therefore tempting to suggest that these cells may represent a population of CD163<sup>+</sup> DC1, which has been previously described in human breast and lung cancer patients (<xref ref-type="bibr" rid="B35">35</xref>). However, in this previous study, these CD163<sup>+</sup> DCs were defined as a discrete subset of DCs distinct from both cDC1s and cDC2s and had the ability to efficiently trigger CD103 expression in CD8<sup>+</sup> T cells <italic>in vitro</italic>, but the expression of XCR1 was not measured. The origin of these XCR1<sup>+</sup>CD163<sup>+</sup> cells remains unclear, and more work is warranted. Nevertheless, we anticipate that the flexibility and operability of the Cyclone pipeline will help address this question as well as aid in the investigation of these cells&#x2019; spatial relationship within the tumor microenvironment and better define their potential role in the tumor immune response. In the spectral flow dataset, HBV-tetramer staining could be integrated with the clustering of T-cell subsets to better phenotype these antigen-specific cells. We found that the majority of tetramer+ CD4<sup>+</sup> or CD8<sup>+</sup> T cells clustered together in their respective compartments, indicating a shared phenotype. Unsurprisingly for this day 8 timepoint in the immune response, MHC Class I Tetramer+ CD8<sup>+</sup> T cells and MHC Class II Tetramer+ CD4<sup>+</sup> T cells fell primarily into clusters identified as effector T cells with high expression of markers of activation, including CD44 and PD-1, and markers of high proliferative capacity, including Ly108 and TCF-1. Notably, those two markers have been previously identified in antigen-specific CD8<sup>+</sup> T cells during Lymphocytic choriomeningitis virus (LCMV) chronic infection and cancer and have been associated with a cycling T-cell stage, which precedes exhaustion program upon chronic stimulation (<xref ref-type="bibr" rid="B36">36</xref>).</p>
<p>This work has several limitations, which can be explored or developed in future work. The set of clustering algorithms we compared for selection was restricted to four popular algorithms that are prevalent in cytometry analysis and have been elsewhere benchmarked (<xref ref-type="bibr" rid="B24">24</xref>), rather than us doing a more exhaustive search <italic>de novo</italic>. In addition, though we selected FlowSOM as our default and fully optimized algorithm, CLARA and FlowSOM performed similarly in our comparisons; while the user has the option to select CLARA clustering, the DBI optimization only works with FlowSOM in our implementation. We additionally noted in our evaluation of the clustering that the selected resolutions sometimes failed to capture low-abundance populations as their clusters, such as cDC1s, eosinophils, or ASCs. This is unsurprising and a common challenge in global clustering approaches, which can be remedied either by 1) &#x201c;over-clustering&#x201d; the data (i.e., selecting a higher resolution such that you may better capture lower abundance populations but larger populations are further partitioned and need to be subsequently merged back together into a single population) or 2) &#x201c;subclustering&#x201d; the data (e.g., taking only the myeloid subsets and clustering them with relevant markers such that only those cells and markers are partitioning the space). We also noted decreased accuracy clusters defined by markers with a continuum of expression such as CD45RA (e.g., na&#xef;ve v. memory T cells); however, because the placement of this manual gate on a continuum is somewhat arbitrary, modest discrepancy between the manually defined abundance and cluster abundance seems of low importance as long as each is applied consistently across samples of interest. In addition, while we found these algorithms to be robust to downsampling, we have found that FlowSOM and CLARA could not accommodate a larger CyTOF data set of ~90 million cells, regardless of resource dedication. Further optimization of those algorithms is needed to be able to accommodate increasingly large cytometry datasets. Finally, while we have invested in interoperability and extensive documentation and vignettes for ease of use, Cyclone could be even more accessible to a non-coding user base with the development of a graphical user interface (GUI) such as an R-shiny application (<ext-link ext-link-type="uri" xlink:href="https://shiny.rstudio.com">https://shiny.rstudio.com</ext-link>) or as a workflow in web-based tools such as the University of California San Francisco (UCSF) Data Library (<ext-link ext-link-type="uri" xlink:href="https://datalibrary.ucsf.edu/">https://datalibrary.ucsf.edu/</ext-link>), the Chan Zuckerberg Initiative&#x2019;s CELLxGENE (<ext-link ext-link-type="uri" xlink:href="https://github.com/chanzuckerberg/cellxgene">https://github.com/chanzuckerberg/cellxgene</ext-link>), or CellEngine (<ext-link ext-link-type="uri" xlink:href="https://github.com/primitybio/cellengine-python-toolkit">https://github.com/primitybio/cellengine-python-toolkit</ext-link>), which could be the subject of future efforts. In sum, Cyclone takes the next step forward in the optimization and democratization of cytometry-based analysis tools to further power biological discovery.</p>
</sec>
<sec id="s4">
<title>Methods</title>
<sec id="s4_1">
<title>Mass cytometry data generation and preprocessing</title>
<sec id="s4_1_1">
<title>Sample collection</title>
<p>Blood samples from patients were obtained under institutional review board (IRB) #11-07994 protocol approved by the UCSF IRB, and #2012.059-2 (SCoo) protocol approved by Sutter Health IRB the IRB of record for the study. Written informed consent was obtained from all patients. De-identified healthy donor sample was obtained from Vitalant Research Institute (San Francisco, CA, USA). Blood was collected into sterile EDTA vacutainer tubes (VWR International, Road Radnor, PA, USA) and processed within 24 hours of collection.</p>
</sec>
<sec id="s4_1_2">
<title>Sample preparation</title>
<p>PBMCs were isolated using Ficoll-Paque Plus (GE Healthcare, Chicago, IL, USA) density gradient centrifugation; after isolation, cells were aliquoted in 0.5 &#xd7; 10<sup>7</sup> cells per vial in cell freezing media (10% dimethyl sulfoxide (DMSO) in fetal bovine serum (FBS)) and cryopreserved.</p>
</sec>
<sec id="s4_1_3">
<title>CyTOF panel and staining</title>
<p>Mass cytometry was performed as described (<xref ref-type="bibr" rid="B37">37</xref>) with modifications. Briefly, primary conjugates of mass cytometry antibodies were prepared using the MaxPAR antibody conjugation kit (Fluidigm, South San Francisco, CA, USA) according to the manufacturer&#x2019;s recommended protocol. Following labeling, antibodies were diluted in Candor PBS Antibody Stabilization solution (Candor Bioscience GmbH, Wangen, Germany) supplemented with 0.02% NaN<sub>3</sub> to between 0.1 mg/mL and 0.3 mg/mL, and stored long-term at 4&#xb0;C. Each antibody clone was titrated to optimal staining concentrations using unstimulated or anti-CD3/CD28 stimulated PBMC samples. All mass cytometry antibodies and concentrations used for analysis can be found in <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplemental Table&#xa0;1</bold>
</xref>. Mass cytometry experiments were performed over the course of nine separate experiments. Each PBMC sample was thawed at 37&#xb0;C and washed in pre-warmed RPMI-1640 media (Sigma-Aldrich Life Sciences, Burlington, MA, USA) supplemented with 10% FBS (Gibco, Thermo Fisher Scientific, Waltham, MA, USA) in the presence of 250U Pierce Universal Nuclease for Cell Lysis (Thermo Fisher Scientific, Rockford, IL, USA); cells were counted using the Beckman Vi-Cell XR Cell Counter. Only samples with viability &gt;75% were used (85% viability on average); 2.5 &#xd7; 10<sup>6</sup> cells/sample were stained for 1 min with 25 mM of cisplatin (Sigma-Aldrich) in phosphate-buffered saline (PBS) plus EDTA, before undergoing quenching 1:1 with PBS/EDTA/bovine serum albumin (BSA) to determine viability. Staining was performed on a shaker (90 rpm). For staining, cells were first resuspended in cell staining media (CSM) (Fluidigm, South San Francisco, CA, USA) with 5 &#x3bc;L of Human TruStain FcX&#x2122; block (BioLegend, San Diego, CA, USA) for 5 min at room temperature to block Fc receptors, followed by staining with CXCR5 antibody in CSM (3 &#x3bc;g/mL) for 30 min at 4&#xb0;C. Cells were washed, fixed with Fix I Buffer from The Cell-ID&#x2122; 20-Plex Pd Barcoding Kit following the manufacturer&#x2019;s instructions (Fluidigm, South San Francisco, CA, USA), and barcoded by mass-tag labeling with distinct combinations of stable Pd isotopes diluted in Maxpar Barcode Perm Buffer (Fluidigm, South San Francisco, CA, USA) as described previously (<xref ref-type="bibr" rid="B5">5</xref>). Twenty barcoded samples were pooled into a single fluorescence-activated cell sorting (FACS) tube (BD Biosciences, San Jose, CA, USA) and stained with a cocktail containing surface marker antibodies (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplemental Table&#xa0;1</bold>
</xref>) in a final volume of 1,000 &#x3bc;L of CSM for 30 min at room temperature. Samples drawn at different timepoints per patient were barcoded together. Cells were then permeabilized with perm wash buffer (eBioscience, Thermo Fisher Scientific) following the manufacturer&#x2019;s instructions and then incubated with a cocktail containing intracellular marker (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplemental Table&#xa0;1</bold>
</xref>) antibodies diluted in perm wash buffer (eBioscience, Thermo Fisher Scientific) for 1 hour at 4&#xb0;C. Cells were finally stained with 191/193Ir DNA intercalator (Fluidigm, South San Francisco, CA, USA) diluted in PBS with 1.6% paraformaldehyde (PFA) (Electron Microscopy Sciences, Hatfield, PA, USA) 24 hours prior to data acquisition.</p>
</sec>
<sec id="s4_1_4">
<title>Data acquisition</title>
<p>For acquisition, cells were washed and resuspended at 1&#xd7;10<sup>6</sup>/mL in deionized water + 10%EQ four-element calibration beads (Fluidigm) and run on a Fluidigm CyTOF2 Helios mass Cytometer within 1 week of staining.</p>
</sec>
<sec id="s4_1_5">
<title>Data preprocessing (premessa -&gt; bead normalization)</title>
<p>After data collection, we used the premessa pipeline (<ext-link ext-link-type="uri" xlink:href="https://github.com/ParkerICI/premessa">https://github.com/ParkerICI/premessa</ext-link>) to normalize data and deconvolute individual samples. From the individual sample files, normalization beads were excluded based on Ce140 and Eu153 signals. Single-cell events were identified based on Ir191 DNA signal measured against event length, and CD45<sup>&#x2212;</sup> Pt195<sup>+</sup> dead cells were excluded (<xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Figure&#xa0;1</bold>
</xref>). Potential batch effects were minimized by including a control sample from the same individual in each experimental run.</p>
</sec>
<sec id="s4_1_6">
<title>FCS modifications (addition of unique cell IDs for tracking)</title>
<p>The FCS files do not contain cell identifiers. To accurately compare cell identity from either the manually gated annotations or the FlowSOM or PARC clusters annotated by two immunology experts, we added unique identifiers to all CD45<sup>+</sup> live gated cells across all FCS files and used these files for all downstream analyses. We created the unique identifiers by combining the sample identifier and the index of the single cells in each FCS file to generate unique identifiers, such as &#x201c;&lt;sample_id&gt;_&lt;cell_id&gt;&#x201c;. In this way, each cell gained a unique barcode id used for future comparative analyses.</p>
</sec>
<sec id="s4_1_7">
<title>Batch correction</title>
<p>The samples were processed across nine batches. While the bead normalization of CyTOF data controls for the batch effects introduced due to instrument change, it does not address all factors affecting batch-to-batch variations (<xref ref-type="bibr" rid="B38">38</xref>). To evaluate the batch effects, we first clustered the single cells using CLARA (<xref ref-type="bibr" rid="B25">25</xref>) and compared the batch compositions across clusters. We observed uneven distribution of batches across clusters (data not shown). To account for this residual batch-to-batch variation, we corrected the signal for batches using CytoNorm (<xref ref-type="bibr" rid="B21">21</xref>). We used the control samples (the same sample that was replicated across batches) to train the model and adjusted the batch effects in non-control samples using nCells = 4k, nClus = 10, and the grid size of 5 &#xd7; 5 (xdim = 5; ydim = 5). We determined that CytoNorm adjustment removed the majority of batch effects from the data (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>).</p>
</sec>
<sec id="s4_1_8">
<title>Manual gating</title>
<p>The main mass cytometry gating scheme can be found in <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplemental Figure&#xa0;1</bold>
</xref> and shows the exclusion of beads, debris, dead cells, CD45<sup>&#x2212;</sup> cells. Following this removal, we show the main gating strategy for identifying major immune cell populations from the mass cytometry dataset.</p>
</sec>
<sec id="s4_1_9">
<title>Generation of subsets</title>
<p>Different numbers of cells were selected from batch-corrected FCS files to produce specific subsets of the data: 1k subset = 1,000 cells per FCS file, 10k subset = 10,000 cells per FCS file, 50k subset = 50,000 cells per FCS file, and full subset = all cells from FCS files.</p>
</sec>
</sec>
<sec id="s4_2">
<title>Evaluation of runtime and memory usage for different clustering tools</title>
<p>To scope clustering algorithms for our pipeline, we tested four Python- or R-based widely used clustering tools for cytometry data: CLARA, FlowSOM, PhenoGraph, and PARC. CyTOF analysis allows users to capture hundreds of thousands of cells, and clustering such large datasets requires runtime- and memory-efficient tools that do not compromise clustering performance. One aim of our work was to design a scalable pipeline for large datasets (containing many samples, each with hundreds of thousands of cells). Therefore, we decided to compare the runtime and memory usage of the selected clustering tools. Different parameters affect the runtime and memory usage in different tools. We observed the parameters affecting the number of clusters were the ones that controlled algorithm runtime. To perform a fair comparison between the tools, we identified these parameters affecting cluster counts and used values that produced a similar number of clusters across all four tools. We used k = 24 in CLARA, xdim = 6 and ydim = 6 in FlowSOM, k = 25 in PhenoGraph, and resolution = 1.3 in PARC. These parameters resulted in 30&#x2013;37 clusters called by each tool. We measured the time for clustering and the memory usage on CentOS nodes of a high-performance computer cluster.</p>
</sec>
<sec id="s4_3">
<title>Pipeline</title>
<sec id="s4_3_1">
<title>Inputs</title>
<p>In order to begin a Cyclone run, metadata files &#x201c;file_metadata.csv&#x201d;, &#x201c;marker_metadata.csv&#x201d;, and &#x201c;config.yml&#x201d; must be generated. These files are unique to your dataset, but Cyclone requires certain metadata to locate the FCS files and associate metadata with them. In the file metadata csv, column &#x201c;file_name&#x201d; records the name of each FCS file, &#x201c;donor_id&#x201d; denotes sample origin, &#x201c;pool_id&#x201d; denotes batch identification (if any), and &#x201c;control_sample&#x201d; is a Boolean indicating whether the sample is a control or not. The file metadata csv is created via any scripting language based on the FCS files present. To create the &#x201c;marker_metadata.csv&#x201d;, we provide &#x201c;cyclone/make_marker_metadata_csv.R&#x201d;, which will read in an FCS file, and create a file with the following columns: channel_name, marker_name, used_for_UMAP, used_for_clustering, and used_for_scaffold. It may be advantageous to use a marker for clustering, but not use that marker for UMAP calculation. Thus, the Boolean values for each marker in the &#x201c;used_for_*&#x201d; columns provided granular controls for using markers for UMAP, clustering, and SCAFFoLD analysis. The final file &#x201c;config.yml&#x201d; controls how Cyclone will be run. In brief, it contains the absolute path of FCS files and save location for pipeline outputs, location of metadata files, and parameters associated with data processing (arcsinh_cofactor, default 5), UMAP (n_neighbors, default 15; min_dist, default 0.1; spread, default 0.1; learning_rate, default 0.5; &#x201c;random&#x201d; to assign initial embedding positions), and clustering (k, default 3) to tune node connectivity. Cyclone is able to cluster with both FlowSOM and CLARA, and the choice is specified in &#x201c;config.yml&#x201d;. CLARA editable parameters are k (default 20), metric (default euclidean), and samples (default 50). The FlowSOM editable parameters are xdim (default 6) and ydim (default 6), which define the grid (6 &#xd7; 6). If meta_clustering with FlowSOM is TRUE, (default FALSE), the &#x201c;nClus&#x201d; of FlowSOM (i.e. the number of meta-clusters) can be defined using &#x201c;k&#x201d; (default 3).</p>
</sec>
<sec id="s4_3_2">
<title>Processing steps</title>
<p>At the start of the Cyclone run, the pipeline references the metadata.csv files to read in FCS data files and creates a raw expression matrix. After the arcsinh transformation of count values, a transformed expression matrix is created using values specified in config.yml. Cyclone also creates a &#x201c;cell_metadata&#x201d; object, which associates each cell/event with file and marker metadata. Next, Cyclone uses the uwot package with default parameters to calculate UMAP on the transformed matrix with the &#x201c;used_for_UMAP&#x201d; column of the markers&#x2019; metadata file. Each cell/event UMAP dimensions are assigned to the cell_metadata object. Next, clustering is performed using either FlowSOM or CLARA. In FlowSOM, a default series of grids (cyclone/grid_sizes.csv) are specified, and different resolutions and clustering parameters are calculated and evaluated with the DBI. After cluster optimization, the Cyclone pipeline exits to await user evaluation of the Cluster VS DBI plot (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1D</bold>
</xref>, <xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure&#xa0;2A</bold>
</xref>). If CLARA is used for clustering, the user must specify clustering parameters, and no DBI-based optimization is performed. After specifying a specific grid (FlowSOM) or specifying &#x201c;k&#x201d; in config.yml to calculate clusters (CLARA), clustering is calculated on the transformed matrix with the &#x201c;used_for_clustering&#x201d; column from the markers&#x2019; metadata file. Cluster assignment is stored in the cell_metadata object. After clustering, Cyclone calculates cluster frequency matrices (raw and normalized) and calculates cluster median expression matrix. If SCAFFoLD analysis is selected, a gated directory of landmark FCS files is required. For each cluster, Cyclone obtains the closest landmark population and stores this assignment in cluster_metadata. After calculating statistics, SCAFFoLD analysis is saved in a cluster_metadata object. With analysis completed, Cyclone outputs several helpful plots.</p>
</sec>
</sec>
<sec id="s4_4">
<title>CyTOF cluster annotation and benchmarking using manually gated populations</title>
<p>After clustering the data using FlowSOM or PARC, we performed manual annotation of the resulting clusters from both tools based on the median expression of markers. Since each individual may annotate clusters differently, we attempted to account for human-to-human variations in the manual annotation of clusters by having two immunology experts independently annotate the clusters from FlowSOM and PARC. We established &#x201c;coarse&#x201d; and &#x201c;fine&#x201d;-levels of annotations. Coarse annotations describe cells of different compartment groups (e.g., CD4<sup>+</sup> T cells, CD8<sup>+</sup> T cells, and B cells). Fine annotations further parse cell types into subtypes according to their phenotype (e.g., CD8<sup>+</sup> T cells are split further into na&#xef;ve, central memory, effector memory, and effector memory re-expressing CD45RA). We then calculated similarities between cluster annotations of single cells and the single-cell annotation based on a third immunology expert gating of the CyTOF data (ground truth annotation). We performed this comparison for subsets of data multiple times and calculated averages. Specifically, we subsampled cluster annotation and manual gating annotation of randomly selected 10,000 cells, calculated accuracy, and adjusted the Rand index, Fowlkes&#x2013;Mallows index, and mutual information. We repeated this process 10 times with different random seeds and calculated the mean across the iterations for each similarity metric. We performed this analysis for FlowSOM (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1H</bold>
</xref>, <xref ref-type="supplementary-material" rid="SF2">
<bold>Supplemental Figure&#xa0;2C</bold>
</xref>) and PARC annotations (data not shown) from both immunology experts.</p>
</sec>
<sec id="s4_5">
<title>dittoSeq visualizations</title>
<p>Cyclone outputs (checkpoint1.Rdata and checkpoint8.Rdata) were used to create a SingleCellExperiment object (<xref ref-type="bibr" rid="B29">29</xref>) in R containing the arcsinh transformed expression matrix, UMAP embeddings, clustering, and cell and sample-level metadata. dittoSeq (<xref ref-type="bibr" rid="B16">16</xref>) functions dittoFreqPlot and dittoBarPlot were then used to create boxplots of cluster frequencies per sample and stacked bar plots of batch composition per cluster, respectively.</p>
</sec>
<sec id="s4_6">
<title>Spectral flow data generation and analysis</title>
<sec id="s4_6_1">
<title>Mice</title>
<p>Wild-type (WT) C57BL/6 mice were purchased from Jackson Laboratory (Bar Harbor, ME, USA). HBVEnvRag<sup>&#x2212;/&#x2212;</sup> mice were previously described (<xref ref-type="bibr" rid="B30">30</xref>). Briefly, HBVEnvRag<sup>&#x2212;/&#x2212;</sup> mice were generated using HBV&#xad;Env<sup>+</sup> mice [lineage 107&#xad;5D; gift from F. Chisari, Scripps Research Institute (<xref ref-type="bibr" rid="B39">39</xref>)] backcrossed to <italic>Rag1</italic>
<sup>&#x2212;/&#x2212;</sup> C57BL/6 mice for 15 generations. HBVEnvRag<sup>&#x2212;/&#x2212;</sup> mice contain the entire envelope (subtype ayw) protein&#xad;coding region under the constitutive transcriptional control of the mouse albumin promoter. Young (3 weeks old, before weaning) or adult (8 to 12 weeks old) HBVEnvRag<sup>&#x2212;/&#x2212;</sup> mice were given 10<sup>8</sup> syngeneic splenocytes pooled from adult (8 to 12 weeks) WT mice in 0.5 mL of phosphate-buffered saline via tail vein injection. Mice were maintained at the Laboratory Animal Resource Center (LARC) facility at UCSF where health was monitored daily by the LARC staff. Experimental procedures were performed in accordance with Institutional Animal Care and Use Committee (IACUC)-approved protocols, and all efforts were made to minimize animal suffering.</p>
</sec>
<sec id="s4_6_2">
<title>Sample preparation</title>
<p>Mice were anesthetized in chambers with 1.5% oxygen and 3% isoflurane. Samples for the T cell-focused panel were isolated from the liver after perfusion and digestion. Briefly, mice were perfused via the inferior vena cava using digestion media [Hanks&#x2019; Balanced Salt Solution (HBSS), crude collagenase (0.2 mg/mL; Crescent Chemical, Islandia, NY, USA), and DNase I (0.02 mg/mL; Roche Diagnostics, Basel, Switzerland)]. Livers were forced through a 70&#xad;&#x3bc;m filter using a syringe plunger, and debris was removed by centrifugation (30 g for 3 min). Supernatants were collected and centrifuged for 10 min at 650 g. Cells were isolated from the Percoll interface using a 60%:40% Percoll gradient. Samples for the myeloid-focused panel were isolated from the liver after 6 min of perfusion via the inferior vena cava using digestion media as above. Livers were chopped and further digested with liberase and DNase I (Roche Diagnostics) [1 W&#xfc;nsch Units (WU) and 0.8 mg, respectively, in 10 mL of RPMI-1640 containing 5% FBS] for 30 min at 37&#xb0;C in a shaking water bath. Livers were forced through a 70&#xad;&#x3bc;m filter, and debris was removed by centrifugation (30 g for 3 min). Supernatants were collected and centrifuged for 10 min at 650 g. Cells were isolated from the interface of a 60%:40% Percoll gradient.</p>
</sec>
<sec id="s4_6_3">
<title>Staining and acquisition</title>
<p>Cells were prepared as above. Samples stained with the T cell-focused panel were first stained with custom HBV-specific tetramers developed by the National Institutes of Health (NIH) Tetramer Core Facility at Emory University. Cells were stained first with MHC Class II Tetramer for 1 hour at 37&#xb0;C protected from light. Next, these cells were then stained with MHC Class I Tetramer for 1 hour at 4&#xb0;C protected from light. For both panels, cells were then stained with Live/Dead Fixable Blue (Thermo Fisher Scientific) according to the manufacturer&#x2019;s instructions. Next, surface markers on cells were stained according to standard protocols with anti-mouse antibodies detailed in <xref ref-type="supplementary-material" rid="SM2">
<bold>Supplemental Table&#xa0;2</bold>
</xref> (Myeloid-focused Panel) or <xref ref-type="supplementary-material" rid="SM3">
<bold>Supplemental Table&#xa0;3</bold>
</xref> (T cell-focused Panel). Finally, cells were fixed and permeabilized using FoxP3/Transcription Factor Staining Buffer Set (Thermo Fisher Scientific, cat. 00-5523-00) and stained with anti-mouse antibodies targeting intracellular markers according to standard protocols.</p>
</sec>
<sec id="s4_6_4">
<title>Data acquisition and preprocessing</title>
<p>Single-color reference controls were collected for live unmixing with calculated autofluorescence immediately before fully stained sample acquisition. For sample acquisition, cells were run the same day as preparation and staining on an Aurora flow cytometer (Cytek, Fremont, CA, USA) with a 5-laser setup at the UCSF Flow CoLab. Following sample collection, spectral signatures were checked to ensure reliable unmixing, and channel spillover was adjusted in SpectroFlo (Cytek). Prior to Cyclone and manual gating analyses, FCS files were gated on leukocyte size/granularity, singlets, live, and CD45<sup>+</sup> events in FlowJo (<xref ref-type="supplementary-material" rid="SF4">
<bold>Supplemental Figure&#xa0;4C</bold>
</xref>).</p>
</sec>
<sec id="s4_6_5">
<title>Cyclone analysis</title>
<p>Cyclone pipeline was run as described above, specifying an arcsinh transformation cofactor of 6000, with all other values kept as default (<xref ref-type="supplementary-material" rid="SM4">
<bold>Supplementary Table&#xa0;4</bold>
</xref>). For both panels, forward scatter, side scatter, CD45, Live/Dead Blue, and autofluorescence were excluded for UMAP generation and clustering. In addition, for the T cell-focused panel, channels for tetramers were excluded for UMAP generation and clustering. For the T-cell panel, 21 clusters were identified, all of which were identified as specific lymphocyte populations (B cells, NK cells, T cells, or ILCs) or were assumed to belong to the myeloid compartment (<xref ref-type="supplementary-material" rid="SF4">
<bold>Supplemental Figure&#xa0;4A</bold>
</xref>). For the myeloid panel, 20 clusters were identified. Among these clusters, three were unidentifiable by markers in the panel (<xref ref-type="supplementary-material" rid="SF4">
<bold>Supplemental Figure&#xa0;4B</bold>
</xref>) and excluded from subsequent analysis (<xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5B, D</bold>
</xref>).</p>
</sec>
<sec id="s4_6_6">
<title>Manual gating</title>
<p>Expert manual gating was performed in FlowJo to assign unique cell-type identities to events. Example plots for T cell-focused gating strategy (<xref ref-type="supplementary-material" rid="SF4">
<bold>Supplemental Figure&#xa0;4D</bold>
</xref>) and Myeloid-focused panel (<xref ref-type="supplementary-material" rid="SF4">
<bold>Supplemental Figure&#xa0;4E</bold>
</xref>) are provided.</p>
</sec>
<sec id="s4_6_7">
<title>Annotation comparison</title>
<p>FCS files for manual gates were exported from FlowJo and then read into a flowFrame object in R using the FlowCore package. Raw data were used to uniquely match events and assign annotations from manual gating and Cyclone clustering. Cells that received both manual gating and Cyclone clustering annotations were used to generate annotation concordance heatmaps (<xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5C, D</bold>
</xref>).</p>
</sec>
<sec id="s4_6_8">
<title>Tetramer+ cell visualization</title>
<p>Tetramer+ events were identified as having raw fluorescent intensities 3 or more standard deviations above the mean intensity.</p>
</sec>
</sec>
<sec id="s4_7">
<title>Imaging data generation and analysis</title>
<sec id="s4_7_1">
<title>Sample preparation</title>
<p>All patients consented by the UCSF IPI clinical coordinator group for tissue collection under a UCSF IRB-approved protocol (UCSF IRB #20-31740). Samples were obtained after surgical excision with biopsies taken by pathology assistants to confirm the presence of tumor cells. Freshly resected samples were placed in ice-cold PBS or Leibovitz&#x2019;s L-15 medium in a 50-mL conical tube, immediately transported to the laboratory for sample labeling, and formalin fixed for imaging analysis. Clinical data on three samples were denoted as follows: CRC1 = IPICRC072, CRC2 = IPICRC057, and KID1 = IPIKID090.</p>
</sec>
<sec id="s4_7_2">
<title>Staining and imaging immunofluorescent 7-plex panel</title>
<p>A 7-plex immunofluorescent panel was created using the Ventana BenchMark Ultra (Roche Diagnostics) automated staining platform. All reagents were from Discovery (Ventana Medical Systems, Tucson, AZ, USA) and used according to the manufacturer&#x2019;s instructions, except as noted. Heat-Induced Epitope Retrieval (HIER) was performed with the Cell Conditioning 1 (CC1) solution (cat. 950-124) for 64 min at 97&#xb0;C. The primary antibodies used were CD3 (1:100, clone: D7A6E from Cell Signaling Technology, Danvers, MA, USA), CD4 (RTU, clone: SP35 from Ventana), CD8 (1:100, clone: D8A8Y from Cell Signaling Technology), CD163 (1:250, clone: EPR19518 from Abcam, Cambridge, UK), HLA-DR (1:500, clone: EPR3692 from Abcam), XCR1 (1:40, clone: D2F8T from Cell Signaling Technology), and EpCAM (1:50, clone: D9S3P, from Cell Signaling Technology). The tissue was counterstained with DAPI (Akoya cat. FP1490) for nucleus localization. The staining was conducted in two cycles: the first cycle had CD3, CD4, CD8, CD163, HLA-DR, and XCR1; the second cycle had EpCAM. Both cycles had DAPI. The slide was scanned using a whole slide scanner after each staining cycle. Finally, the images from both cycles were registered to achieve the 7-plex image shown in <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref>.</p>
</sec>
<sec id="s4_7_3">
<title>Data preprocessing</title>
<p>Cell segmentation was performed by utilizing ark-analysis (v0.2.9) DeepCell (<xref ref-type="bibr" rid="B31">31</xref>) software with nuclei (DAPI) and membrane (CD3, CD4, CD8, and CD163) as modalities for segmentation. Mean fluorescent intensity was measured from each cell region of interest (ROI) and then arcsinh transformed to input into Cyclone pipeline as &#x201c;trans_exp&#x201d;.csv file. Manual gating was performed in a custom Napari application version 0.4.14 to classify cells as positive or negative for each marker. For stroma versus tumor region separation, Qupath (version 0.3) software was used to annotate each region using EpCAM for colorectal or PanCK for kidney as a marker reference. Finally, data cluster labels generated from the Cyclone pipeline or manual gating were generated as a csv file corresponding to each cell ROI and integrated with a segmented imaging file for cluster overlay in Napari.</p>
</sec>
<sec id="s4_7_4">
<title>Cyclone analysis</title>
<p>After determining an optimal grid size, the Cyclone output heatmap showing arcsinh transformed and scaled marker expression data were used to annotate clusters and scaled by row. Log2 fold change of each cluster in stroma versus tumor was determined by calculating the frequency of stromal over frequency of tumor in each cluster and transformed by a log of 2. Contour plots comparing Cyclone versus manual gating were generated in python (version 3.8.12) by plotting arcsinh transformed cell ROIs annotated as XCR1 and CD163.</p>
</sec>
</sec>
</sec>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The FCS files for the CyTOF experiments used in this study are deposited and publicly available at FlowRepository (<ext-link ext-link-type="uri" xlink:href="http://flowrepository.org/id/FR-FCM-Z6LS">http://flowrepository.org/id/FR-FCM-Z6LS</ext-link>). The cyclone pipeline and package are available at <ext-link ext-link-type="uri" xlink:href="https://github.com/UCSF-DSCOLAB/cyclone/">github.com/UCSF-DSCOLAB/cyclone/</ext-link>. The documentation on running the cyclone is available at <ext-link ext-link-type="uri" xlink:href="https://github.com/UCSF-DSCOLAB/cyclone/blob/main/vignettes/Running.md">https://github.com/UCSF-DSCOLAB/cyclone/blob/main/vignettes/Running.md</ext-link> and the documentation on follow up analyses is available at <ext-link ext-link-type="uri" xlink:href="https://github.com/UCSF-DSCOLAB/cyclone/blob/main/vignettes/FollowUp.html">https://github.com/UCSF-DSCOLAB/cyclone/blob/main/vignettes/FollowUp.html</ext-link>.</p>
</sec>
<sec id="s6" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The studies involving humans were approved by UCSF and Sutter Health Institutional Review Boards. The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study. All animal experiments done in this study were approved by the UCSF Institutional Animal Care and Use Committee. The study was conducted in accordance with the local legislation and institutional requirements.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>RP and RJ contributed equally to this manuscript. RP, RJ, KI, NDC, TC, NWC, BS, DB, LL-J, JJ, JP, and LA, generated and analyzed data and contributed to the manuscript by providing figures, tables, and important intellectual contributions. RP, RJ, AC, and GF had full access to all the data in the study and take responsibility for the integrity of the data and the accuracy of the analyses. RP, RJ, KI, DB, NDC, AC, and GF wrote and edited the manuscript. RP, RJ, BS, DB, AR, and LL-J performed computational analyses of the data. MN and NWC were part of the UCSF Immunoprofiler team who performed tissue preparation and staining for the immunofluorescence performed on the different human tumor specimens. RP, RJ, BS, DB, and AR developed the Cyclone package and wrote the tutorial. AE and KI performed computational analysis of the multiplexed immunofluorescence data on human tumors. MK, SC, JB, AC, and GF were actively involved in the direction of projects and provided financial resources for the obtainment of the data. AC and GF co-led this project. All authors edited and critically revised the manuscript for important intellectual content and gave final approval for the version to be published.</p>
</sec>
</body>
<back>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>Acquisition and analysis of certain human samples described in this study were partially funded by contributions from AbbVie, Amgen, Bristol-Myers Squibb, and Pfizer as part of the UCSF Immunoprofiler Initiative. Data acquisition was further supported by R01DK103735 and P30DK026743. Further support came from the Bakar ImmunoX Initiative and ImmunoX Computational Biology Initiative at UCSF funding for AC, GF, DB, RP, and RJ.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We thank all members of the Combes and Fragiadakis labs, UCSF Bakar ImmunoX Initiative, UCSF CoLabs, and the UCSF Immunoprofiler Consortium for discussion and guidance while developing this study. We would like to thank Suprita Trilok for help with spectral flow cytometry. We would like to thank Drs David Erle, Michael G Kattah, and Elvira Mennillo for scientific discussion and editing the manuscript. We would like to particularly thank Isabelle Tingin, Garry Shumakher, Elizabeth Edmiston, and Meghan Zubradt for their constant support and help during this study. Finally, we thank all patients and their families for placing their trust in us. </p>
</ack>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>AC is a shareholder and member of the scientific advisory board of Foundery Innovations. AC receives funding from Genentech, Corbus, and Ely Lilly. GF receives funding from Eli Lilly. MK is a founder and shareholder of Pionyr Immunotherapeutics and Foundery Innovations.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s11" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fimmu.2023.1167241/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fimmu.2023.1167241/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table_1.xlsx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"/>
<supplementary-material xlink:href="Table_2.xlsx" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"/>
<supplementary-material xlink:href="Table_3.xlsx" id="SM3" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"/>
<supplementary-material xlink:href="Table_4.xlsx" id="SM4" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"/>
<supplementary-material xlink:href="Image_1.pdf" id="SF1" mimetype="application/pdf">
<label>Supplementary Figure&#xa0;1</label>
<caption>
<p>CyTOF manual gating of human PBMC. <bold>(A)</bold> Pre-Gating: gating out beads, debris, dead cells, RBC and granulocytes. <bold>(B)</bold> Hierarchical gating was applied to identify 22 &#x201c;landmark&#x201d; immune populations: CD14<sup>+</sup> CD16<sup>-</sup> classical monocytes, CD14<sup>-</sup>CD16<sup>+</sup> nonclassical monocytes, CD14<sup>+</sup> CD16<sup>+</sup> intermediate monocytes, cDC1, cDC2, pDC, basophils, Natural Killer cells, regulatory CD4<sup>+</sup> T cells, CD4<sup>+</sup> T cells (Naive, T<sub>CM</sub>, T<sub>EM</sub>, T<sub>EMRA</sub>), CD8<sup>+</sup> T cells (Naive, T<sub>CM</sub>, T<sub>EM</sub>, T<sub>EMRA</sub>), &#x3b3;&#x3b4;<sup>+</sup> T cells, B cells, ASC (antibody producing cells) HLA-DRpos (CXCR5- B cells), plasmablasts. Also shown gating of eosinophils (CD15<sup>+</sup> CD16<sup>+</sup> HLA-DR<sup>-</sup>) and neutrophils (CD15<sup>+</sup> CD16<sup>-</sup> HLA-DR<sup>-</sup>).</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Image_2.tif" id="SF2" mimetype="image/tiff">
<label>Supplementary Figure&#xa0;2</label>
<caption>
<p>Assessment of &#x201c;Fine&#x201d;-level annotations and metrics for evaluating cyclone outputs. <bold>(A)</bold> Full Davis-Bouldin index plot showing up to 200 potential clusters identified through FlowSOM. <bold>(B)</bold> Heatmap of full dataset &#x201c;fine&#x201d;-level annotations identifying cell types and cell subtypes, based on ground truth (GT) manual gating (rows) compared to annotated FlowSOM clusters (columns). <bold>(C)</bold> Comparison metrics based on &#x201c;fine&#x201d;-level annotations from two individuals. Various performance metrics were used to assess the accuracy of clusters called in the FlowSOM clustering compared to ground truth. <bold>(D)</bold> Ground Truth expert cluster &#x201c;fine&#x201d;-level annotation identifying broad cell types and specific cell subtypes based on manual gating. <bold>(E)</bold> FlowSOM clustering &#x201c;fine&#x201d;-level annotations based on CyTOF panel expression. <bold>F)</bold> Depiction of a cluster dispersed across UMAP space (Cluster 26) with a heterogenous protein expression profile compared to <bold>G)</bold> a cluster with uniform protein expression and tight UMAP localization (Cluster 28).</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Image_3.tif" id="SF3" mimetype="image/tiff">
<label>Supplementary Figure&#xa0;3</label>
<caption>
<p>&#x201c;Fine&#x201d;-level annotations after running Cyclone on the downsampled dataset. The dataset was down-sampled to 50k cells per sample and then run through cyclone. Clusters&#x2019; cell type identities were inferred by experts using Cyclone plot outputs. <bold>(A)</bold> UMAP annotated by cluster number. <bold>(B)</bold> Heatmap of median archsinh transformed expression (unscaled) per cluster, used to annotate clusters. <bold>(C)</bold> UMAP from 50k down-sample run, colored by fine annotations. <bold>(D)</bold> Comparison of per-cell annotations between the 50k down-sample versus the full dataset. Various performance metrics were used to assess the accuracy of clusters called in the downsampled dataset compared to the full dataset.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Image_4.tif" id="SF4" mimetype="image/tiff">
<label>Supplementary Figure&#xa0;4</label>
<caption>
<p>Spectral flow cytometry cell type identification. <bold>(A)</bold> Heatmap of markers used for UMAP generation, clustering, and identification for &#x201c;fine&#x201d;-level Cyclone clusters for the spectral flow cytometry dataset with a T cell-focused panel presented in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5A</bold>
</xref>. <bold>(B)</bold> Heatmap of markers used for UMAP generation, clustering, and identification for &#x201c;fine&#x201d;-level Cyclone clusters for the spectral flow cytometry dataset with a Myeloid-focused panel presented in <xref ref-type="fig" rid="f5">
<bold>5B</bold>
</xref>. <bold>(C)</bold> Representative two-dimensional flow plots demonstrating pre-gating on live CD45+ cells before analysis with either Cyclone or expert manual gating in FlowJo. <bold>(D)</bold> Manual gating strategy for samples in the T cell-focused panel. <bold>(E)</bold> Manual gating strategy for samples in the Myeloid-focused panel.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Image_5.tif" id="SF5" mimetype="image/tiff">
<label>Supplementary Figure&#xa0;5</label>
<caption>
<p>Resolution of manual gating and Cyclone clustering. <bold>(A)</bold> Table (top, left) depicting cell type annotations based on the 7 markers used and cell frequency histograms with manual gating thresholds (red line) on above (yellow) or below (blue) threshold for CD163 and CD3 marker. Thresholds were set on CD163 and CD3 at 11, 12, and 13 to denote the cutoff and visualization of annotated macrophage<sup>+</sup> cells (pink dots). Circled white regions indicate examples of over-thresholding (threshold 11) and under-thresholding (threshold 13). Scale bar denotes 20&#x3bc;m. <bold>(B)</bold> Frequency bar plot of each cluster annotation divided into stromal (pink) or tumor (blue) compartments. <bold>(C)</bold> Log2 fold change bar plot on stromal/tumor ratio for each annotated cluster in CRC1, CRC2, and KID1 tumor samples. <bold>(D)</bold> Scatterplot of cDC1<sup>+</sup> (pink), XCR1<sup>+</sup>CD163<sup>+</sup> (blue) cells, and other between cyclone pipeline (left) and manual gating (right).</p>
</caption>
</supplementary-material>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ginhoux</surname> <given-names>F</given-names>
</name>
<name>
<surname>Yalin</surname> <given-names>A</given-names>
</name>
<name>
<surname>Dutertre</surname> <given-names>CA</given-names>
</name>
<name>
<surname>Amit</surname> <given-names>I</given-names>
</name>
</person-group>. <article-title>Single-cell immunology: Past, present, and future</article-title>. <source>Immunity</source> (<year>2022</year>) <volume>55</volume>(<issue>3</issue>):<fpage>393</fpage>&#x2013;<lpage>404</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.immuni.2022.02.006</pub-id>
</citation>
</ref>
<ref id="B2">
<label>2</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stuart</surname> <given-names>T</given-names>
</name>
<name>
<surname>Butler</surname> <given-names>A</given-names>
</name>
<name>
<surname>Hoffman</surname> <given-names>P</given-names>
</name>
<name>
<surname>Hafemeister</surname> <given-names>C</given-names>
</name>
<name>
<surname>Papalexi</surname> <given-names>E</given-names>
</name>
<name>
<surname>Mauck</surname> <given-names>WM</given-names>
</name>
<etal/>
</person-group>. <article-title>Comprehensive integration of single-cell data</article-title>. <source>Cell</source> (<year>2019</year>) <volume>177</volume>(<issue>7</issue>):<page-range>1888&#x2013;902</page-range>. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2019.05.031</pub-id>
</citation>
</ref>
<ref id="B3">
<label>3</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bendall</surname> <given-names>SC</given-names>
</name>
<name>
<surname>Simonds</surname> <given-names>EF</given-names>
</name>
<name>
<surname>Qiu</surname> <given-names>P</given-names>
</name>
<name>
<surname>Amir</surname> <given-names>ED</given-names>
</name>
<name>
<surname>Krutzik</surname> <given-names>PO</given-names>
</name>
<name>
<surname>Finck</surname> <given-names>R</given-names>
</name>
<etal/>
</person-group>. <article-title>Single-cell mass cytometry of differential immune and drug responses across a human hematopoietic continuum</article-title>. <source>Science</source> (<year>2011</year>) <volume>332</volume>(<issue>6030</issue>):<page-range>687&#x2013;96</page-range>. doi: <pub-id pub-id-type="doi">10.1126/science.1198704</pub-id>
</citation>
</ref>
<ref id="B4">
<label>4</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nolan</surname> <given-names>JP</given-names>
</name>
<name>
<surname>Condello</surname> <given-names>D</given-names>
</name>
</person-group>. <article-title>Spectral flow cytometry</article-title>. <source>Curr Protoc Cytom</source> (<year>2013</year>) <volume>27</volume>(<issue>63</issue>):<page-range>1.27.1&#x2013;1.27.13</page-range>. doi: <pub-id pub-id-type="doi">10.1002/0471142956.cy0127s63</pub-id>
</citation>
</ref>
<ref id="B5">
<label>5</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Spitzer</surname> <given-names>MH</given-names>
</name>
<name>
<surname>Carmi</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Reticker-Flynn</surname> <given-names>NE</given-names>
</name>
<name>
<surname>Kwek</surname> <given-names>SS</given-names>
</name>
<name>
<surname>Madhireddy</surname> <given-names>D</given-names>
</name>
<name>
<surname>Martins</surname> <given-names>MM</given-names>
</name>
<etal/>
</person-group>. <article-title>Systemic immunity is required for effective cancer immunotherapy</article-title>. <source>Cell</source> (<year>2017</year>) <volume>168</volume>(<issue>3</issue>):<fpage>487</fpage>&#x2013;<lpage>502</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2016.12.022</pub-id>
</citation>
</ref>
<ref id="B6">
<label>6</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gaudilli&#xe8;re</surname> <given-names>B</given-names>
</name>
<name>
<surname>Fragiadakis</surname> <given-names>GK</given-names>
</name>
<name>
<surname>Bruggner</surname> <given-names>RV</given-names>
</name>
<name>
<surname>Nicolau</surname> <given-names>M</given-names>
</name>
<name>
<surname>Finck</surname> <given-names>R</given-names>
</name>
<name>
<surname>Tingle</surname> <given-names>M</given-names>
</name>
<etal/>
</person-group>. <article-title>Clinical recovery from surgery correlates with single-cell immune signatures</article-title>. <source>Sci Transl Med</source> (<year>2014</year>) <volume>6</volume>(<issue>255</issue>):<fpage>255ra131</fpage>. doi: <pub-id pub-id-type="doi">10.1126/scitranslmed.3009701</pub-id>
</citation>
</ref>
<ref id="B7">
<label>7</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rahil</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Leylek</surname> <given-names>R</given-names>
</name>
<name>
<surname>Sch&#xfc;rch</surname> <given-names>CM</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>H</given-names>
</name>
<name>
<surname>Bjornson-Hooper</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Christensen</surname> <given-names>SR</given-names>
</name>
<etal/>
</person-group>. <article-title>Landscape of coordinated immune responses to H1N1 challenge in humans</article-title>. <source>J Clin Invest</source> (<year>2020</year>) <volume>130</volume>(<issue>11</issue>):<page-range>5800&#x2013;16</page-range>. doi: <pub-id pub-id-type="doi">10.1172/JCI137265</pub-id>
</citation>
</ref>
<ref id="B8">
<label>8</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vallv&#xe9;-Juanico</surname> <given-names>J</given-names>
</name>
<name>
<surname>George</surname> <given-names>AF</given-names>
</name>
<name>
<surname>Sen</surname> <given-names>S</given-names>
</name>
<name>
<surname>Thomas</surname> <given-names>R</given-names>
</name>
<name>
<surname>Shin</surname> <given-names>M-G</given-names>
</name>
<name>
<surname>Kushnoor</surname> <given-names>D</given-names>
</name>
<etal/>
</person-group>. <article-title>Deep immunophenotyping reveals endometriosis is marked by dysregulation of the mononuclear phagocytic system in endometrium and peripheral blood</article-title>. <source>BMC Med</source> (<year>2022</year>) <volume>20</volume>(<issue>1</issue>):<fpage>158</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12916-022-02359-4</pub-id>
</citation>
</ref>
<ref id="B9">
<label>9</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Quintelier</surname> <given-names>K</given-names>
</name>
<name>
<surname>Couckuyt</surname> <given-names>A</given-names>
</name>
<name>
<surname>Emmaneel</surname> <given-names>A</given-names>
</name>
<name>
<surname>Aerts</surname> <given-names>J</given-names>
</name>
<name>
<surname>Saeys</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Van Gassen</surname> <given-names>S</given-names>
</name>
</person-group>. <article-title>Analyzing high-dimensional cytometry data using FlowSOM</article-title>. <source>Nat Protoc</source> (<year>2021</year>) <volume>16</volume>(<issue>8</issue>):<page-range>3775&#x2013;801</page-range>. doi: <pub-id pub-id-type="doi">10.1038/s41596-021-00550-0</pub-id>
</citation>
</ref>
<ref id="B10">
<label>10</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stassen</surname> <given-names>SV</given-names>
</name>
<name>
<surname>Siu</surname> <given-names>DMD</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>KCM</given-names>
</name>
<name>
<surname>Ho</surname> <given-names>JWK</given-names>
</name>
<name>
<surname>So</surname> <given-names>HKH</given-names>
</name>
<name>
<surname>Tsia</surname> <given-names>KK</given-names>
</name>
</person-group>. <article-title>PARC: ultrafast and accurate clustering of phenotypic data of millions of single cells</article-title>. <source>Bioinformatics</source> (<year>2020</year>) <volume>36</volume>(<issue>9</issue>):<page-range>2778&#x2013;86</page-range>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa042</pub-id>
</citation>
</ref>
<ref id="B11">
<label>11</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Levine</surname> <given-names>JH</given-names>
</name>
<name>
<surname>Simonds</surname> <given-names>EF</given-names>
</name>
<name>
<surname>Bendall</surname> <given-names>SC</given-names>
</name>
<name>
<surname>Davis</surname> <given-names>KL</given-names>
</name>
<name>
<surname>Amir</surname> <given-names>ED</given-names>
</name>
<name>
<surname>Tadmor</surname> <given-names>MD</given-names>
</name>
<etal/>
</person-group>. <article-title>Data-driven phenotypic dissection of AML reveals progenitor-like cells that correlate with prognosis</article-title>. <source>Cell</source> (<year>2015</year>) <volume>162</volume>(<issue>1</issue>):<page-range>184&#x2013;97</page-range>. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2015.05.047</pub-id>
</citation>
</ref>
<ref id="B12">
<label>12</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Amir</surname> <given-names>ED</given-names>
</name>
<name>
<surname>Davis</surname> <given-names>KL</given-names>
</name>
<name>
<surname>Tadmor</surname> <given-names>MD</given-names>
</name>
<name>
<surname>Simonds</surname> <given-names>EF</given-names>
</name>
<name>
<surname>Levine</surname> <given-names>JH</given-names>
</name>
<name>
<surname>Bendall</surname> <given-names>SC</given-names>
</name>
<etal/>
</person-group>. <article-title>viSNE enables visualization of high dimensional single-cell data and reveals phenotypic heterogeneity of leukemia</article-title>. <source>Nat Biotechnol</source> (<year>2013</year>) <volume>31</volume>(<issue>6</issue>):<page-range>545&#x2013;52</page-range>. doi: <pub-id pub-id-type="doi">10.1038/nbt.2594</pub-id>
</citation>
</ref>
<ref id="B13">
<label>13</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Anchang</surname> <given-names>B</given-names>
</name>
<name>
<surname>Hart</surname> <given-names>TDP</given-names>
</name>
<name>
<surname>Bendall</surname> <given-names>SC</given-names>
</name>
<name>
<surname>Qiu</surname> <given-names>P</given-names>
</name>
<name>
<surname>Bjornson</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Linderman</surname> <given-names>M</given-names>
</name>
<etal/>
</person-group>. <article-title>Visualization and cellular hierarchy inference of single-cell data using SPADE</article-title>. <source>Nat Protoc</source> (<year>2016</year>) <volume>11</volume>(<issue>7</issue>):<page-range>1264&#x2013;79</page-range>. doi: <pub-id pub-id-type="doi">10.1038/nprot.2016.066</pub-id>
</citation>
</ref>
<ref id="B14">
<label>14</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Polikowsky</surname> <given-names>HG</given-names>
</name>
<name>
<surname>Drake</surname> <given-names>KA</given-names>
</name>
</person-group>. <article-title>Supervised machine learning with CITRUS for single cell biomarker discovery</article-title>. <source>Methods Mol Biol</source> (<year>2019</year>) <volume>1989</volume>:<page-range>309&#x2013;32</page-range>. doi: <pub-id pub-id-type="doi">10.1007/978-1-4939-9454-0_20</pub-id>
</citation>
</ref>
<ref id="B15">
<label>15</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Spitzer</surname> <given-names>MH</given-names>
</name>
<name>
<surname>Gherardini</surname> <given-names>PF</given-names>
</name>
<name>
<surname>Fragiadakis</surname> <given-names>GK</given-names>
</name>
<name>
<surname>Bhattacharya</surname> <given-names>N</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>RT</given-names>
</name>
<name>
<surname>Hotson</surname> <given-names>AN</given-names>
</name>
<etal/>
</person-group>. <article-title>An interactive reference framework for modeling a dynamic immune system</article-title>. <source>Science</source> (<year>2015</year>) <volume>349</volume>(<issue>6244</issue>):<fpage>1259425</fpage>. doi: <pub-id pub-id-type="doi">10.1126/science.1259425</pub-id>
</citation>
</ref>
<ref id="B16">
<label>16</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bunis</surname> <given-names>DG</given-names>
</name>
<name>
<surname>Andrews</surname> <given-names>J</given-names>
</name>
<name>
<surname>Fragiadakis</surname> <given-names>GK</given-names>
</name>
<name>
<surname>Burt</surname> <given-names>TD</given-names>
</name>
<name>
<surname>Sirota</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>dittoSeq: universal user-friendly single-cell and bulk RNA sequencing visualization toolkit</article-title>. <source>Bioinformatics</source> (<year>2020</year>) <volume>36</volume>:<page-range>5535&#x2013;5536</page-range>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa1011</pub-id>
</citation>
</ref>
<ref id="B17">
<label>17</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Finck</surname> <given-names>R</given-names>
</name>
<name>
<surname>Simonds</surname> <given-names>EF</given-names>
</name>
<name>
<surname>Jager</surname> <given-names>A</given-names>
</name>
<name>
<surname>Krishnaswamy</surname> <given-names>S</given-names>
</name>
<name>
<surname>Sachs</surname> <given-names>K</given-names>
</name>
<name>
<surname>Fantl</surname> <given-names>W</given-names>
</name>
<etal/>
</person-group>. <article-title>NorMalization of mass cytometry data with bead standards</article-title>. <source>Cytometry A</source> (<year>2013</year>) <volume>83</volume>(<issue>5</issue>):<page-range>483&#x2013;94</page-range>. doi: <pub-id pub-id-type="doi">10.1002/cyto.a.22271</pub-id>
</citation>
</ref>
<ref id="B18">
<label>18</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bodenmiller</surname> <given-names>B</given-names>
</name>
<name>
<surname>Zunder</surname> <given-names>ER</given-names>
</name>
<name>
<surname>Finck</surname> <given-names>R</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>TJ</given-names>
</name>
<name>
<surname>Savig</surname> <given-names>ES</given-names>
</name>
<name>
<surname>Bruggner</surname> <given-names>RV</given-names>
</name>
<etal/>
</person-group>. <article-title>Multiplexed mass cytometry profiling of cellular states perturbed by small-molecule regulators</article-title>. <source>Nat Biotechnol</source> (<year>2012</year>) <volume>30</volume>(<issue>9</issue>):<page-range>858&#x2013;67</page-range>. doi: <pub-id pub-id-type="doi">10.1038/nbt.2317</pub-id>
</citation>
</ref>
<ref id="B19">
<label>19</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Davies</surname> <given-names>DL</given-names>
</name>
<name>
<surname>Bouldin</surname> <given-names>DW</given-names>
</name>
</person-group>. <article-title>A cluster separation measure</article-title>. <source>IEEE Trans Pattern Anal Mach Intell</source> (<year>1979</year>) <volume>1</volume>(<issue>2</issue>):<page-range>224&#x2013;7</page-range>. doi: <pub-id pub-id-type="doi">10.1109/TPAMI.1979.4766909</pub-id>
</citation>
</ref>
<ref id="B20">
<label>20</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Patterson-Cross</surname> <given-names>RB</given-names>
</name>
<name>
<surname>Levine</surname> <given-names>AJ</given-names>
</name>
<name>
<surname>Menon</surname> <given-names>V</given-names>
</name>
</person-group>. <article-title>Selecting single cell clustering parameter values using subsampling-based robustness metrics</article-title>. <source>BMC Bioinf</source> (<year>2021</year>) <volume>22</volume>(<issue>1</issue>):<fpage>39</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12859-021-03957-4</pub-id>
</citation>
</ref>
<ref id="B21">
<label>21</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Van Gassen</surname> <given-names>S</given-names>
</name>
<name>
<surname>Gaudilliere</surname> <given-names>B</given-names>
</name>
<name>
<surname>Angst</surname> <given-names>MS</given-names>
</name>
<name>
<surname>Saeys</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Aghaeepour</surname> <given-names>N</given-names>
</name>
</person-group>. <article-title>Cytonorm: A norMalization algorithm for cytometry data</article-title>. <source>Cytometry A</source> (<year>2020</year>) <volume>97</volume>(<issue>3</issue>):<page-range>268&#x2013;78</page-range>. doi: <pub-id pub-id-type="doi">10.1002/cyto.a.23904</pub-id>
</citation>
</ref>
<ref id="B22">
<label>22</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pedersen</surname> <given-names>CB</given-names>
</name>
<name>
<surname>Dam</surname> <given-names>SH</given-names>
</name>
<name>
<surname>Barnkob</surname> <given-names>MB</given-names>
</name>
<name>
<surname>Leipold</surname> <given-names>MD</given-names>
</name>
<name>
<surname>Purroy</surname> <given-names>N</given-names>
</name>
<name>
<surname>Rassenti</surname> <given-names>LZ</given-names>
</name>
<etal/>
</person-group>. <article-title>cyCombine allows for robust integration of single-cell cytometry datasets within and across technologies</article-title>. <source>Nat Commun</source> (<year>2022</year>) <volume>13</volume>(<issue>1</issue>):<fpage>1698</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-022-29383-5</pub-id>
</citation>
</ref>
<ref id="B23">
<label>23</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weber</surname> <given-names>LM</given-names>
</name>
<name>
<surname>Robinson</surname> <given-names>MD</given-names>
</name>
</person-group>. <article-title>Comparison of clustering methods for high-dimensional single-cell flow and mass cytometry data</article-title>. <source>Cytometry A</source> (<year>2016</year>) <volume>12)</volume>:<page-range>1084&#x2013;96</page-range>. doi: <pub-id pub-id-type="doi">10.1002/cyto.a.23030</pub-id>
</citation>
</ref>
<ref id="B24">
<label>24</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>X</given-names>
</name>
<name>
<surname>Song</surname> <given-names>W</given-names>
</name>
<name>
<surname>Wong</surname> <given-names>BY</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>T</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>S</given-names>
</name>
<name>
<surname>GN</surname> <given-names>L</given-names>
</name>
<etal/>
</person-group>. <article-title>A comparison framework and guideline of clustering methods for mass cytometry data</article-title>. <source>Genome Biol</source> (<year>2019</year>) <volume>20</volume>(<issue>1</issue>):<fpage>297</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13059-019-1917-7</pub-id>
</citation>
</ref>
<ref id="B25">
<label>25</label>
<citation citation-type="book">
<person-group person-group-type="editor">
<name>
<surname>Kaufman</surname> <given-names>L</given-names>
</name>
<name>
<surname>Rousseeuw</surname> <given-names>PJ</given-names>
</name>
</person-group> eds. <source>Finding groups in data: an introduction to cluster analysis</source>. <publisher-loc>Hoboken, NJ, USA</publisher-loc>: <publisher-name>John Wiley &amp; Sons, Inc</publisher-name> (<year>1990</year>).</citation>
</ref>
<ref id="B26">
<label>26</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hubert</surname> <given-names>L</given-names>
</name>
<name>
<surname>Arabie</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>Comparing partitions</article-title>. <source>J Classification</source> (<year>1985</year>) <volume>2</volume>(<issue>1</issue>):<fpage>193</fpage>&#x2013;<lpage>218</lpage>. doi: <pub-id pub-id-type="doi">10.1007/BF01908075</pub-id>
</citation>
</ref>
<ref id="B27">
<label>27</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fowlkes</surname> <given-names>EB</given-names>
</name>
<name>
<surname>Mallows</surname> <given-names>CL</given-names>
</name>
</person-group>. <article-title>A method for comparing two hierarchical clusterings</article-title>. <source>J Am Stat Assoc</source> (<year>1983</year>) <volume>78</volume>(<issue>383</issue>):<page-range>553&#x2013;69</page-range>. doi: <pub-id pub-id-type="doi">10.1080/01621459.1983.10478008</pub-id>
</citation>
</ref>
<ref id="B28">
<label>28</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Cover</surname> <given-names>TM</given-names>
</name>
<name>
<surname>Thomas</surname> <given-names>JA</given-names>
</name>
</person-group>. <source>Elements of information theory</source>. <publisher-loc>Hoboken, NJ, USA</publisher-loc>: <publisher-name>John Wiley &amp; Sons, Inc</publisher-name> (<year>2005</year>).</citation>
</ref>
<ref id="B29">
<label>29</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Amezquita</surname> <given-names>RA</given-names>
</name>
<name>
<surname>Lun</surname> <given-names>ATL</given-names>
</name>
<name>
<surname>Becht</surname> <given-names>E</given-names>
</name>
<name>
<surname>Carey</surname> <given-names>VJ</given-names>
</name>
<name>
<surname>Carpp</surname> <given-names>LN</given-names>
</name>
<name>
<surname>Geistlinger</surname> <given-names>L</given-names>
</name>
<etal/>
</person-group>. <article-title>Orchestrating single-cell analysis with Bioconductor</article-title>. <source>Nat Methods</source> (<year>2020</year>) <volume>17</volume>(<issue>2</issue>):<page-range>137&#x2013;45</page-range>. doi: <pub-id pub-id-type="doi">10.1038/s41592-019-0654-x</pub-id>
</citation>
</ref>
<ref id="B30">
<label>30</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baron</surname> <given-names>JL</given-names>
</name>
<name>
<surname>Gardiner</surname> <given-names>L</given-names>
</name>
<name>
<surname>Nishimura</surname> <given-names>S</given-names>
</name>
<name>
<surname>Shinkai</surname> <given-names>K</given-names>
</name>
<name>
<surname>Locksley</surname> <given-names>R</given-names>
</name>
<name>
<surname>Ganem</surname> <given-names>D</given-names>
</name>
</person-group>. <article-title>Activation of a nonclassical NKT cell subset in a transgenic mouse model of hepatitis B virus infection</article-title>. <source>Immunity</source> (<year>2002</year>) <volume>16</volume>(<issue>4</issue>):<page-range>583&#x2013;94</page-range>. doi: <pub-id pub-id-type="doi">10.1016/S1074-7613(02)00305-9</pub-id>
</citation>
</ref>
<ref id="B31">
<label>31</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Greenwald</surname> <given-names>NF</given-names>
</name>
<name>
<surname>Miller</surname> <given-names>G</given-names>
</name>
<name>
<surname>Moen</surname> <given-names>E</given-names>
</name>
<name>
<surname>Kong</surname> <given-names>A</given-names>
</name>
<name>
<surname>Kagel</surname> <given-names>A</given-names>
</name>
<name>
<surname>Dougherty</surname> <given-names>T</given-names>
</name>
<etal/>
</person-group>. <article-title>Whole-cell segmentation of tissue images with human-level performance using large-scale data annotation and deep learning</article-title>. <source>Nat Biotechnol</source> (<year>2022</year>) <volume>40</volume>(<issue>4</issue>):<page-range>555&#x2013;65</page-range>. doi: <pub-id pub-id-type="doi">10.1038/s41587-021-01094-0</pub-id>
</citation>
</ref>
<ref id="B32">
<label>32</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mennillo</surname> <given-names>E</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>YJ</given-names>
</name>
<name>
<surname>Rusu</surname> <given-names>I</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>G</given-names>
</name>
<name>
<surname>Dorman</surname> <given-names>LC</given-names>
</name>
<name>
<surname>Bernard-Vazquez</surname> <given-names>F</given-names>
</name>
<etal/>
</person-group>. <article-title>Single-cell and spatial multi-omics identify innate and stromal modules targeted by anti-integrin therapy in ulcerative colitis</article-title>. <source>BioRxiv</source> (<year>2023</year>). doi: <pub-id pub-id-type="doi">10.1101/2023.01.21.525036</pub-id>
</citation>
</ref>
<ref id="B33">
<label>33</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goltsev</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Samusik</surname> <given-names>N</given-names>
</name>
<name>
<surname>Kennedy-Darling</surname> <given-names>J</given-names>
</name>
<name>
<surname>Bhate</surname> <given-names>S</given-names>
</name>
<name>
<surname>Hale</surname> <given-names>M</given-names>
</name>
<name>
<surname>Vazquez</surname> <given-names>G</given-names>
</name>
<etal/>
</person-group>. <article-title>Deep profiling of mouse splenic architecture with CODEX multiplexed imaging</article-title>. <source>Cell</source> (<year>2018</year>) <volume>174</volume>(<issue>4</issue>):<fpage>968</fpage>&#x2013;<lpage>981.e15</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2018.07.010</pub-id>
</citation>
</ref>
<ref id="B34">
<label>34</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Crozat</surname> <given-names>K</given-names>
</name>
<name>
<surname>Guiton</surname> <given-names>R</given-names>
</name>
<name>
<surname>Contreras</surname> <given-names>V</given-names>
</name>
<name>
<surname>Feuillet</surname> <given-names>V</given-names>
</name>
<name>
<surname>Dutertre</surname> <given-names>C-A</given-names>
</name>
<name>
<surname>Ventre</surname> <given-names>E</given-names>
</name>
<etal/>
</person-group>. <article-title>The XC chemokine receptor 1 is a conserved selective marker of mamMalian cells homologous to mouse CD8alpha+ dendritic cells</article-title>. <source>J Exp Med</source> (<year>2010</year>) <volume>207</volume>(<issue>6</issue>):<page-range>1283&#x2013;92</page-range>. doi: <pub-id pub-id-type="doi">10.1084/jem.20100223</pub-id>
</citation>
</ref>
<ref id="B35">
<label>35</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bourdely</surname> <given-names>P</given-names>
</name>
<name>
<surname>Anselmi</surname> <given-names>G</given-names>
</name>
<name>
<surname>Vaivode</surname> <given-names>K</given-names>
</name>
<name>
<surname>Ramos</surname> <given-names>RN</given-names>
</name>
<name>
<surname>Missolo-Koussou</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Hidalgo</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>Transcriptional and functional analysis of cd1c+ human dendritic cells identifies a CD163+ subset priming CD8+CD103+ T cells</article-title>. <source>Immunity</source> (<year>2020</year>) <volume>53</volume>(<issue>2</issue>):<fpage>335</fpage>&#x2013;<lpage>352.e8</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.immuni.2020.06.002</pub-id>
</citation>
</ref>
<ref id="B36">
<label>36</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Beltra</surname> <given-names>J-C</given-names>
</name>
<name>
<surname>Manne</surname> <given-names>S</given-names>
</name>
<name>
<surname>Abdel-Hakeem</surname> <given-names>MS</given-names>
</name>
<name>
<surname>Kurachi</surname> <given-names>M</given-names>
</name>
<name>
<surname>Giles</surname> <given-names>JR</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Z</given-names>
</name>
<etal/>
</person-group>. <article-title>Developmental relationships of four exhausted CD8+ T cell subsets reveals underlying transcriptional and epigenetic landscape control mechanisms</article-title>. <source>Immunity</source> (<year>2020</year>) <volume>52</volume>(<issue>5</issue>):<page-range>825&#x2013;41</page-range>. doi: <pub-id pub-id-type="doi">10.1016/j.immuni.2020.04.014</pub-id>
</citation>
</ref>
<ref id="B37">
<label>37</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Allen</surname> <given-names>BM</given-names>
</name>
<name>
<surname>Hiam</surname> <given-names>KJ</given-names>
</name>
<name>
<surname>Burnett</surname> <given-names>CE</given-names>
</name>
<name>
<surname>Venida</surname> <given-names>A</given-names>
</name>
<name>
<surname>DeBarge</surname> <given-names>R</given-names>
</name>
<name>
<surname>Tenvooren</surname> <given-names>I</given-names>
</name>
<etal/>
</person-group>. <article-title>Systemic dysfunction and plasticity of the immune macroenvironment in cancer models</article-title>. <source>Nat Med</source> (<year>2020</year>) <volume>26</volume>(<issue>7</issue>):<page-range>1125&#x2013;34</page-range>. doi: <pub-id pub-id-type="doi">10.1038/s41591-020-0892-6</pub-id>
</citation>
</ref>
<ref id="B38">
<label>38</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schuyler</surname> <given-names>RP</given-names>
</name>
<name>
<surname>Jackson</surname> <given-names>C</given-names>
</name>
<name>
<surname>Garcia-Perez</surname> <given-names>JE</given-names>
</name>
<name>
<surname>Baxter</surname> <given-names>RM</given-names>
</name>
<name>
<surname>Ogolla</surname> <given-names>S</given-names>
</name>
<name>
<surname>Rochford</surname> <given-names>R</given-names>
</name>
<etal/>
</person-group>. <article-title>Minimizing batch effects in mass cytometry data</article-title>. <source>Front Immunol</source> (<year>2019</year>) <volume>10</volume>:<elocation-id>2367</elocation-id>. doi: <pub-id pub-id-type="doi">10.3389/fimmu.2019.02367</pub-id>
</citation>
</ref>
<ref id="B39">
<label>39</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chisari</surname> <given-names>FV</given-names>
</name>
<name>
<surname>Filippi</surname> <given-names>P</given-names>
</name>
<name>
<surname>McLachlan</surname> <given-names>A</given-names>
</name>
<name>
<surname>Milich</surname> <given-names>DR</given-names>
</name>
<name>
<surname>Riggs</surname> <given-names>M</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>Expression of hepatitis B virus large envelope polypeptide inhibits hepatitis B surface antigen secretion in transgenic mice</article-title>. <source>J Virol</source> (<year>1986</year>) <volume>60</volume>(<issue>3</issue>):<page-range>880&#x2013;7</page-range>. doi: <pub-id pub-id-type="doi">10.1128/jvi.60.3.880-887.1986</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>