<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Bioinform.</journal-id>
<journal-title>Frontiers in Bioinformatics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Bioinform.</abbrev-journal-title>
<issn pub-type="epub">2673-7647</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">831025</article-id>
<article-id pub-id-type="doi">10.3389/fbinf.2022.831025</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Bioinformatics</subject>
<subj-group>
<subject>Technology and Code</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>genomeSidekick: A user-friendly epigenomics data analysis tool</article-title>
<alt-title alt-title-type="left-running-head">Chen et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fbinf.2022.831025">10.3389/fbinf.2022.831025</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Junjie</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1845946/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhu</surname>
<given-names>Ashley J.</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1589834/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Packard</surname>
<given-names>Ren&#xe9; R. S.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1296227/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Vondriska</surname>
<given-names>Thomas M.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/91420/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Chapski</surname>
<given-names>Douglas J.</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/568217/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Division of Cardiology</institution>, <institution>Department of Medicine</institution>, <institution>David Geffen School of Medicine</institution>, <institution>University of California, Los Angeles</institution>, <addr-line>Los Angeles</addr-line>, <addr-line>CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Anesthesiology and Perioperative Medicine</institution>, <institution>David Geffen School of Medicine</institution>, <institution>University of California, Los Angeles</institution>, <addr-line>Los Angeles</addr-line>, <addr-line>CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Physiology</institution>, <institution>David Geffen School of Medicine</institution>, <institution>University of California, Los Angeles</institution>, <addr-line>Los Angeles</addr-line>, <addr-line>CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Ronald Reagan UCLA Medical Center</institution>, <addr-line>Los Angeles</addr-line>, <addr-line>CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Veterans Affairs West Los Angeles Medical Center</institution>, <addr-line>Los Angeles</addr-line>, <addr-line>CA</addr-line>, <country>United States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1083578/overview">Sean O&#x27;Donoghue</ext-link>, Garvan Institute of Medical Research, Australia</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1811770/overview">Daofeng Li</ext-link>, Washington University in St. Louis, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1035676/overview">Sergio Martinez Cuesta</ext-link>, AstraZeneca, United Kingdom</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Douglas J. Chapski, <email>dchapski@ucla.edu</email>
</corresp>
<fn fn-type="equal" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work</p>
</fn>
<fn fn-type="other">
<p>This article was submitted to Data Visualization, a section of the journal Frontiers in Bioinformatics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>18</day>
<month>07</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>2</volume>
<elocation-id>831025</elocation-id>
<history>
<date date-type="received">
<day>07</day>
<month>12</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>06</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Chen, Zhu, Packard, Vondriska and Chapski.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Chen, Zhu, Packard, Vondriska and Chapski</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Recent advances in epigenomics measurements have resulted in a preponderance of genomic sequencing datasets that require focused analyses to discover mechanisms governing biological processes. In addition, multiple epigenomics experiments are typically performed within the same study, thereby increasing the complexity and difficulty of making meaningful inferences from large datasets. One gap in the sequencing data analysis pipeline is the availability of tools to efficiently browse genomic data for scientists that do not have bioinformatics training. To bridge this gap, we developed genomeSidekick, a graphical user interface written in R that allows researchers to perform bespoke analyses on their transcriptomic and chromatin accessibility or chromatin immunoprecipitation data without the need for command line tools. Importantly, genomeSidekick outputs lists of up- and downregulated genes or chromatin features with differential accessibility or occupancy; visualizes omics data using interactive volcano plots; performs Gene Ontology analyses locally; and queries PubMed for selected gene candidates for further evaluation. Outputs can be saved using the user interface and the code underlying genomeSidekick can be edited for custom analyses. In summary, genomeSidekick brings wet lab scientists and bioinformaticians into a shared fluency with the end goal of driving mechanistic discovery.</p>
</abstract>
<kwd-group>
<kwd>epigenomics</kwd>
<kwd>chromatin</kwd>
<kwd>data visualization</kwd>
<kwd>Shiny app</kwd>
<kwd>bioinformatics</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Computational biology tools written in different languages and applied across diverse fields allow for creative interrogation of genomics data to make biological conclusions. Understandably, the breadth of online genomic data analysis resources may appear overwhelming to a novice programmer. Fortunately, global efforts to bring bioinformatics training to general researchers are well underway (<xref ref-type="bibr" rid="B10">Mulder et al., 2018</xref>). Nevertheless, learning how to code may be a barrier to entry for non-bioinformaticians into the field of epigenomics, yet it is important to incorporate these researchers into the data analysis process. A logical solution to this training issue is an inclusive approach that brings non-bioinformaticians into computational workflows after completing most of the command line processes, thereby fostering scientific creativity and leveraging shared knowledge about how the data are processed, analyzed, and visualized.</p>
<p>While lab skillsets ideally include formal bioinformatics knowledge, genomic researchers who do not understand how to code can readily make meaningful conclusions using processed data. An unmet need within this realm is a software for visualizing genomics data and filtering epigenomic and transcriptomic results for downstream analyses, especially considering the combination of orthogonal genomic datasets required to reveal more comprehensive mechanisms of cell biology. In addition, while Excel is a common tool for management and visualization of data, gene lists can be imported incorrectly into Excel and cause permanent edits to gene names (<xref ref-type="bibr" rid="B22">Ziemann et al., 2016</xref>). To prevent this issue and to promote independence from the bioinformatician, the next logical step is to furnish tools to perform data operations that a novice researcher might otherwise try in Excel.</p>
<p>The availability of distinct measurements to understand genomic mechanisms governing complex cellular and organ phenotypes has increased over time, resulting in a need to combine datasets (<xref ref-type="bibr" rid="B3">Chapski and Vondriska, 2021</xref>). Our recent study using RNA-seq, ATAC-seq, reduced representation bisulfite sequencing (RRBS), and chromatin structural data is an example of such integration of orthogonal data to make meaningful conclusions about chromatin architectural dynamics during heart failure (<xref ref-type="bibr" rid="B2">Chapski et al., 2021</xref>). Another investigation established an Atlas of murine ATAC-seq and RNA-seq data across 86 immune cell types and integrated the two datasets to identify a subset of cell types containing open regulatory elements bound by retinoic acid receptor-related orphan receptor gamma (ROR&#x3b3;) or paired-box protein PAX5 (as measured by ChIP-seq), thereby linking chromatin accessibility, transcription, and transcription factor binding in specific cell types (<xref ref-type="bibr" rid="B20">Yoshida et al., 2019</xref>). A common feature of all &#x2018;omics investigations is the need to ask questions of the massive datasets once acquired&#x2014;to prioritize for further mechanistic evaluation. We also appreciate that even professional bioinformaticians may not have the time to perform bespoke analyses for collaborators: thus, a tool for transforming lists of genes into functional targets for a focused, mechanistic experiment is an opportunity to bring non-computational scientists and clinicians into the genomic analysis process.</p>
<p>To bridge the gap between processed data and biological inference, we built user-friendly genomic data visualization and manipulation tools for investigators without computational training. This GUI-based software called genomeSidekick allows for investigation of transcriptomic (RNA-seq) data in addition to chromatin accessibility (ATAC-seq) and chromatin immunoprecipitation-sequencing (ChIP-seq) data in a web browser. Based on a Shiny (<xref ref-type="bibr" rid="B1">Chang et al., 2021</xref>) dashboard written in R (<xref ref-type="bibr" rid="B16">Team, R. C, 2020</xref>), our tool&#x2014;which we have named genomeSidekick&#x2014;generates commonly used, intuitive graphs with interactive information retrieval. Moreover, we wrapped data visualization features for each individual experiment (RNA-seq, ATAC-seq, and ChIP-seq) into individual tabs to make switching between experiments easier. We also provide a tab to integrate RNA-seq, ATAC-seq, and ChIP-seq datasets, so modulation of the transcriptome and epigenome can be examined based on multiple criteria from independent experiments. Lastly, we provide links to external tools (and offer to perform small analyses locally) to facilitate Gene Ontology analysis and PubMed searches.</p>
<p>Freely available on GitHub (<ext-link ext-link-type="uri" xlink:href="https://www.github.com/dchapski/genomeSidekick">https://www.github.com/dchapski/genomeSidekick</ext-link>), genomeSidekick also contains extensive user-friendly documentation in a README markdown file with informational links so that most novice bioinformaticians can achieve results quickly. Lastly, genomeSidekick is a customizable tool that allows for code editing to support a shared collaboration between bioinformaticians and non-computational personnel in the biological research setting, thereby promoting increased computational engagement by non-bioinformaticians.</p>
</sec>
<sec sec-type="methods" id="s2">
<title>Methods</title>
<p>To run genomeSidekick, users should download the software from the repository on GitHub (<ext-link ext-link-type="uri" xlink:href="https://www.github.com/dchapski/genomeSidekick">https://www.github.com/dchapski/genomeSidekick</ext-link>) and then open the app.R file using Rstudio and click the &#x201c;Run&#x201d; button in the upper right corner of the script. Alternatively, users can download the code and run the app directly from the terminal using &#x201c;R -e shiny:runApp (&#x201c;/path/to/app.R&#x201d;).&#x201d; Comma-separated or tab-delimited input RNA-seq data should include gene names (either identifiers or common names) with an adjusted <italic>p</italic>-value and log2FoldChange (preferably from a tool such as DESeq2 (<xref ref-type="bibr" rid="B8">Love et al., 2014</xref>) or edgeR (<xref ref-type="bibr" rid="B12">Robinson et al., 2010</xref>), which corrected <italic>p</italic>-values for multiple testing and provide fold change information). Comma-separated or tab-delimited input ATAC-seq data should include adjusted <italic>p</italic>-values and log2FoldChange information about accessibility peaks [the output from DiffBind (<xref ref-type="bibr" rid="B13">Ross-Innes et al., 2012</xref>) works well], in addition to either the closest gene or an overlapping gene for each feature. Gene names should be included for the ATAC-seq data as they are required for merging the RNA-seq and ATAC-seq dataset; however, independent analysis of ATAC-seq data alone does not require gene names. Importantly, other epigenomic experiments such as ChIP-seq outputs containing log2FoldChanges and adjusted <italic>p</italic>-values can be used on the genomeSidekick platform, either alone or in combination with RNA-seq data as described above for the ATAC-seq tab. We provide test data on GitHub and a hyperlink to the data directly within the app.</p>
<p>Extensive documentation regarding installation of R, RStudio, and dependencies for genomeSidekick is provided on the GitHub page. This documentation is also provided within the software so users can directly find information on how to run the software within the app. We also provide an install.R script on the GitHub page to facilitate installation of dependencies. To run the app in a password protected location online, a Shiny subscription can be purchased from the RStudio website (pricing starts at $9 USD/month in 2021). For exploration, we also provide online access at <ext-link ext-link-type="uri" xlink:href="https://genomesidekick.shinyapps.io/genomesidekick/">https://genomesidekick.shinyapps.io/genomesidekick/</ext-link>.</p>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<p>The genomeSidekick software, written in R, can be run on a laptop and requires few dependencies to analyze RNA-seq, ATAC-seq, ChIP-seq, and any other epigenomics datasets that contain <italic>p</italic>-values and fold changes. The utility of genomeSidekick comes from its interface built on the Shiny framework in R (Team, 2020). This genomics dashboard allows separation of experimental strategies <italic>via</italic> individual tabs in the GUI (<xref ref-type="fig" rid="F1">Figure 1</xref>). Inputs include processed data tables that can be loaded directly into the app. For example, an output from DESeq2 (<xref ref-type="bibr" rid="B8">Love et al., 2014</xref>) that contains the log2FoldChange and adjusted <italic>p</italic>-value information required for the visualizations. Other inputs include the output from DiffBind (<xref ref-type="bibr" rid="B13">Ross-Innes et al., 2012</xref>), a tool that statistically evaluates differentially bound or accessible genomic regions in the case of chromatin immunoprecipitation followed by sequencing (ChIP-seq) or ATAC-seq data, respectively.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Flow chart detailing genomeSidekick functionalities. After raw sequencing data is obtained from the sequencing core, alignment, and statistical analyses on genes or chromatin features should be performed by a bioinformatician, with at least three columns as a result: feature name, adjusted <italic>p</italic>-values, and log2FoldChange (blue boxes). Then, genomeSidekick users can upload these data tables (.csv or tab-delimited files) as inputs to their corresponding tabs within the Shiny app (green boxes), which will contain functions for generation of independent interactive volcano plots (yellow boxes) as well as integrated ATAC-seq and RNA-seq graphs (orange boxes), for example. Note that in addition to ATAC-seq, users may analyze ChIP-seq and other epigenomics data inputs containing <italic>p</italic>-values and log2FoldChanges. Uploading an RNA-seq data set will also allow the application to perform g:Profiler GO analysis (magenta box) and generate downloadable lists of up- and downregulated genes (purple box). HPA: Human Protein Atlas.</p>
</caption>
<graphic xlink:href="fbinf-02-831025-g001.tif"/>
</fig>
<p>Visualizations for volcano plots are coded using ggplot2 (<xref ref-type="bibr" rid="B19">Wickham, 2016</xref>) based code and visualized using ggplotly (<xref ref-type="bibr" rid="B14">Sievert, 2020</xref>), an open-source R package allows for interactive inspection of graphs (<xref ref-type="fig" rid="F2">Figures 2A&#x2013;C</xref>). The ggplotly visualizations allow for truly interactive point-by-point investigation to reveal individual metrics about each data point (gene name, adjusted <italic>p</italic>-value, log2FoldChange, and other custom information within the table). Superimposed on these visualizations are gene names highlighted by small lines [visualized using ggrepel (<xref ref-type="bibr" rid="B15">Slowikowski, 2021</xref>)] to indicate the <italic>n</italic> most significant points in the dataset. Notably, when a gene point is clicked within a volcano plot, genomeSidekick links the user to either the NCBI database, the UniProt (<xref ref-type="bibr" rid="B18">UniProt, 2021</xref>) website, or the Human Protein Atlas (<xref ref-type="bibr" rid="B17">Uhlen et al., 2015</xref>) for further investigation of candidate gene functions. Some genes are not available for data visualization since many tools that calculate differential expression/accessibility only statistically evaluate loci containing experimental data, thereby resulting in unmeasured regions without a <italic>p</italic>-value (<xref ref-type="fig" rid="F2">Figure 2D</xref> shows an example of this phenomenon). When RNA-seq and ATAC-seq (or ChIP-seq) inputs include common genomic feature information (for example, gene names), genomeSidekick can merge and filter these tables to produce a list of the <italic>n</italic> most upregulated and downregulated genes with accessibility information. In addition, the merge computation is performed in a way that gene names do not become corrupted from loading data in Excel [for more, see Introduction above and (<xref ref-type="bibr" rid="B22">Ziemann et al., 2016</xref>)]. This merged dataset can then be visualized as a volcano plot: one dataset (RNA-seq, for example) is plotted along the axes and the other dataset visualized using different point size and coloring to show additional information (<xref ref-type="fig" rid="F2">Figure 2E</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Example visualizations of sample RNA-seq and ATAC-seq data using genomeSidekick. <bold>(A)</bold> Volcano plot of RNA-seq data shows down-regulated genes in blue, upregulated genes in red, and non-significant genes in gray. Thresholds for <italic>p</italic>-value and log2FoldChange can be adjusted within genomeSidekick. <bold>(B)</bold> Searching for a particular gene(s) will highlight the target gene in red and mark the non-target genes in blue. In the example query for three genes, only one is found and visualized on the graph. <bold>(C)</bold> The non-target genes can be hidden by clicking on the Non-Target label in the legend (resulting in a grey label) and un-hidden by clicking the label a second time. Details about any gene shown on the interactive volcano plot can be shown by hovering over the dot on the plot. <bold>(D)</bold> Looking at the RNA-seq data upload page, genes without a <italic>p</italic>-value are not plotted on the volcano plot and therefore will not return any results when queried on the graph (see red lines). Common RNA-seq analysis packages do not evaluate all genes in the genome due to low detection, and this varies by experiment. Panels <bold>(A)</bold> through <bold>(D)</bold> use RNA-seq data as an example, but the functionalities are the exact same for ATAC-seq data. <bold>(E)</bold> Once both RNA-seq and ATAC-seq data sets are uploaded, they can be integrated into one graph using either the RNA-seq or ATAC-seq data as the base. Example data shown have padj &#x3c;0.05 and FDR &#x3c;0.1 in the RNA and ATAC-seq datasets, respectively.</p>
</caption>
<graphic xlink:href="fbinf-02-831025-g002.tif"/>
</fig>
<p>To test the ease of dataset integration in a setting outside our institution, a collaborator provided a use case for custom analysis of RNA-seq and ATAC-seq data from (<xref ref-type="bibr" rid="B2">Chapski et al., 2021</xref>). Specifically, this collaborator sought to determine how the expression and chromatin accessibility at gene loci change with 3&#xa0;days cardiac pressure overload (a pathological model that eventually leads to heart failure) in mice. Interestingly, the integrated RNA-seq and ATAC-seq output of genomeSidekick showed a significant increase in transcription and chromatin accessibility at the <italic>Xirp2</italic> gene locus (<xref ref-type="fig" rid="F3">Figure 3</xref>), consistent with an earlier study showing that the cardiac stressor angiotensin II elicits an increase in <italic>Xirp2</italic> transcription mediated by the transcription factor MEF2A (<xref ref-type="bibr" rid="B9">McCalmon et al., 2010</xref>). This exercise, performed on the collaborator&#x2019;s first exploration of the software, suggests that genomeSidekick is useful for quick exploration of datasets and can provide meaningful scientific insights to first time users.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Example integrated RNA-seq/ATAC-seq visualization from a collaborator&#x2019;s first time using genomeSidekick. A collaborator recently visualized change in transcription and promoter accessibility at the murine <italic>Xirp2</italic> locus after 3&#xa0;days cardiac pressure overload. This gene, also upregulated at the transcriptional level after angiotensin II treatment, also becomes more accessible with pressure overload, suggesting that the observed increase in transcription is a consequence of increased accessibility. Dot size and color scale indicate ATAC-seq-log10(adjusted <italic>p</italic>-value) and log2FoldChange, respectively. Example data shown have padj and FDR less than 0.05 in the RNA and ATAC-seq datasets, respectively.</p>
</caption>
<graphic xlink:href="fbinf-02-831025-g003.tif"/>
</fig>
<p>Importantly, the gene list outputs from each genomeSidekick tab are displayed for direct use as inputs for other software. For example, genomeSidekick includes a feature to perform local Gene Ontology analysis on smaller gene list outputs using the gprofiler2 (<xref ref-type="bibr" rid="B5">Kolberg et al., 2020</xref>) package in R in addition to a link to the g:Profiler website (<xref ref-type="bibr" rid="B11">Raudvere et al., 2019</xref>) for analyses of larger output gene lists from genomeSidekick that might take longer on a local machine. Lastly, we include a feature for quick PubMed searches of genes of interest that outputs query results directly in the app. This feature is based on the easyPubMed package in R (<xref ref-type="bibr" rid="B4">Fantini, 2019</xref>) and is designed to keep users&#x2019; eyes on their data instead of opening a new tab to perform queries on data points of interest. Taken together, these features allow a non-bioinformatician to increase their computational fluency without having to learn how to code.</p>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>We built a tool called genomeSidekick to facilitate inclusion of non-bioinformaticians into computational workflows for RNA-seq, ATAC-seq, ChIP-seq, or any other datasets that undergo statistical testing. This GUI-based software written in R allows individuals to focus their efforts on biological inference without having to frontload the bioinformatics training required to maneuver the command line. Specifically, genomeSidekick facilitates integration of gene expression and chromatin accessibility data, for example, to narrow down gene lists for further analyses. In addition, the software provides an opportunity for non-bioinformaticians to perform small edits to the code to customize their visualizations and filtering criteria without having to learn R. Overall, genomeSidekick will bring wet lab scientists onto a more level playing field for common data analysis questions, thereby reducing dependence on bioinformaticians.</p>
<p>Additional software exists for analysis of gene expression and epigenomics data and may be useful for more computationally versed individuals. For example, DEApp can be used to perform differential expression testing and data visualization (<xref ref-type="bibr" rid="B7">Li and Andrade, 2017</xref>), although a significant hurdle to using this tool is knowing which statistical approach to use for differential expression within the software. In addition, DEBrowser (<xref ref-type="bibr" rid="B6">Kucukural et al., 2019</xref>) and VisRseq (<xref ref-type="bibr" rid="B21">Younesy et al., 2015</xref>) are useful for performing end-to-end bioinformatics analyses of datasets, and both programs complete complicated tasks such as heatmap generation and principal component analysis. Importantly, these tools may require knowledge of data transformations at each step of a given analysis and/or training in statistics. In contrast, genomeSidekick provides a platform for users to explore and integrate processed transcriptomics and epigenomics data and create figures without the complexity seen in other tools (<xref ref-type="table" rid="T1">Table 1</xref>).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Example graphical user interfaces for genomics tasks (genomeSidekick in bold).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Software</th>
<th align="left">Advantages and considerations</th>
<th align="left">Reference</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">DEApp</td>
<td align="left">Differential expression and data visualization in one tool, many options to calculate statistics</td>
<td align="left">
<xref ref-type="bibr" rid="B7">Li and Andrade, (2017)</xref>
</td>
</tr>
<tr>
<td align="left">DEBrowser</td>
<td align="left">End-to-end analysis (filtering, heatmaps, dimensionality reduction), may require more than basic knowledge of statistics</td>
<td align="left">
<xref ref-type="bibr" rid="B6">Kucukural et al. (2019)</xref>
</td>
</tr>
<tr>
<td align="left">VisRseq</td>
<td align="left">End-to-end analysis (filtering, heatmaps, dimensionality reduction), requires knowledge of JavaScript Object Notation (JSON)</td>
<td align="left">
<xref ref-type="bibr" rid="B21">Younesy et al. (2015)</xref>
</td>
</tr>
<tr>
<td align="left">
<bold>genomeSidekick</bold>
</td>
<td align="left">
<bold>Volcano plots, experiment integration, Gene Ontology analysis, PubMed search, suitable for early beginners</bold>
</td>
<td align="left">
<bold>This paper</bold>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The simplicity of genomeSidekick allows researchers with no bioinformatics background to investigate their own datasets after initial mapping, quantification, and differential testing by a bioinformatician. Thresholding of <italic>p</italic>-values for individual experiments can be edited for custom stringency, which allows wet lab researchers to perform independent analyses without requesting individual gene lists from a bioinformatician. Moreover, extensive documentation providing explanations of individual functions and links to learning resources is condensed into an intuitive README file on GitHub with an intuitive interface and examples.</p>
<p>The genomeSidekick application allows bioinformaticians to send data to collaborators and then have them interact with multiple datasets independently. Importantly, the app can be hosted online for a small monthly fee using <ext-link ext-link-type="uri" xlink:href="https://www.shinyapps.io/">https://www.shinyapps.io</ext-link>, thereby facilitating longer distance collaborations. Accordingly, for simple data exploration, we provide genomeSidekick online at <ext-link ext-link-type="uri" xlink:href="https://genomesidekick.shinyapps.io/genomesidekick/">https://genomesidekick.shinyapps.io/genomesidekick/</ext-link>. Overall, genomeSidekick will help bring wet lab researchers into the computational realm by fostering creativity with data visualization and integrative analyses in a user-friendly format.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <ext-link ext-link-type="uri" xlink:href="https://github.com/dchapski/genomeSidekick">https://github.com/dchapski/genomeSidekick</ext-link>.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>JC and DC conceived of the study. JC, AZ, and DC wrote the software. RS and TV provided infrastructure. DC supervised the project and wrote the manuscript. All authors read and approved the final manuscript.</p>
</sec>
<sec sec-type="COI-statement" id="s7">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s8">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ack>
<p>The authors would like to thank collaborator Dr. Christoph D. Rau for software testing and members of the Vondriska Lab for comments and suggestions. Research in the Vondriska Lab is supported by the NIH, UCLA Clinical and Translational Science Institute, the Department of Anesthesiology and Perioperative Medicine, and the David Geffen School of Medicine at UCLA. RP is supported by VA Merit BX004558 and the UCLA Cardiovascular Discovery Fund/Lauren B. Leichtman and Arthur E. Levine Investigator Award.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Allaire</surname>
<given-names>J. J.</given-names>
</name>
<name>
<surname>Sievert</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Schloerke</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <source>Shiny: web application framework for R</source>. </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chapski</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Cabaj</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Morselli</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mason</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Soehalim</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Early adaptive chromatin remodeling events precede pathologic phenotypes and are reinforced in the failing heart</article-title>. <source>J. Mol. Cell. Cardiol.</source> <volume>160</volume>, <fpage>73</fpage>&#x2013;<lpage>86</lpage>. <pub-id pub-id-type="doi">10.1016/j.yjmcc.2021.07.002</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chapski</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Vondriska</surname>
<given-names>T. M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Taking data science to heart: Next scale of gene regulation</article-title>. <source>Curr. Cardiol. Rep.</source> <volume>23</volume> (<issue>5</issue>), <fpage>46</fpage>. <pub-id pub-id-type="doi">10.1007/s11886-021-01467-6</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Fantini</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2019</year>). <source>easyPubMed: search and retrieve scientific publication records from PubMed</source>. </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kolberg</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Raudvere</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Kuzmin</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Vilo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Peterson</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2020709</year>). <article-title>gprofiler2&#x2013; an R package for gene list functional enrichment analysis and namespace conversion toolset g:Profiler</article-title>. <source>F1000Res.</source> <volume>9</volume> (<issue>ELIXIR</issue>). <pub-id pub-id-type="doi">10.12688/f1000research.24956.1</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kucukural</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Yukselen</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Ozata</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Garber</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>DEBrowser: interactive differential expression analysis and visualization tool for count data</article-title>. <source>BMC Genomics</source> <volume>20</volume> (<issue>1</issue>), <fpage>6</fpage>. <pub-id pub-id-type="doi">10.1186/s12864-018-5362-x</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Andrade</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>DEApp: An interactive web interface for differential expression analysis of next generation sequence data</article-title>. <source>Source Code Biol. Med.</source> <volume>12</volume>, <fpage>2</fpage>. <pub-id pub-id-type="doi">10.1186/s13029-017-0063-4</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Love</surname>
<given-names>M. I.</given-names>
</name>
<name>
<surname>Huber</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Anders</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2</article-title>. <source>Genome Biol.</source> <volume>15</volume> (<issue>12</issue>), <fpage>550</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-014-0550-8</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McCalmon</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Desjardins</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Ahmad</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Davidoff</surname>
<given-names>K. S.</given-names>
</name>
<name>
<surname>Snyder</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Sato</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>Modulation of angiotensin II-mediated cardiac remodeling by the MEF2A target gene Xirp2</article-title>. <source>Circ. Res.</source> <volume>106</volume> (<issue>5</issue>), <fpage>952</fpage>&#x2013;<lpage>960</lpage>. <pub-id pub-id-type="doi">10.1161/CIRCRESAHA.109.209007</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mulder</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Schwartz</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Brazas</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>Brooksbank</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Gaeta</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Morgan</surname>
<given-names>S. L.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>The development and application of bioinformatics core competencies to improve bioinformatics training and education</article-title>. <source>PLoS Comput. Biol.</source> <volume>14</volume> (<issue>2</issue>), <fpage>e1005772</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1005772</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raudvere</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Kolberg</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Kuzmin</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Arak</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Adler</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Peterson</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>g:profiler: a web server for functional enrichment analysis and conversions of gene lists (2019 update)</article-title>. <source>Nucleic Acids Res.</source> <volume>47</volume> (<issue>W1</issue>), <fpage>W191</fpage>&#x2013;<lpage>W198</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz369</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Robinson</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>McCarthy</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Smyth</surname>
<given-names>G. K.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>edgeR: a bioconductor package for differential expression analysis of digital gene expression data</article-title>. <source>Bioinformatics</source> <volume>26</volume> (<issue>1</issue>), <fpage>139</fpage>&#x2013;<lpage>140</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp616</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ross-Innes</surname>
<given-names>C. S.</given-names>
</name>
<name>
<surname>Stark</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Teschendorff</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Holmes</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Ali</surname>
<given-names>H. R.</given-names>
</name>
<name>
<surname>Dunning</surname>
<given-names>M. J.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Differential oestrogen receptor binding is associated with clinical outcome in breast cancer</article-title>. <source>Nature</source> <volume>481</volume> (<issue>7381</issue>), <fpage>389</fpage>&#x2013;<lpage>393</lpage>. <pub-id pub-id-type="doi">10.1038/nature10730</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Sievert</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2020</year>). <source>Interactive web-based data visualization with R, plotly, and shiny</source>. <publisher-name>Chapman and Hall/CRC</publisher-name>. </citation>
</ref>
<ref id="B15">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Slowikowski</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2021</year>). <source>ggrepel: Automatically position non-overlapping text labels with &#x27;ggplot2</source>. </citation>
</ref>
<ref id="B16">
<citation citation-type="book">
<collab>Team, R.C</collab> (<year>2020</year>). <source>R: A language and environment for statistical computing</source>. </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Uhlen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fagerberg</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hallstrom</surname>
<given-names>B. M.</given-names>
</name>
<name>
<surname>Lindskog</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Oksvold</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mardinoglu</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Proteomics. tissue-based map of the human proteome.</article-title> <source>Science</source> <volume>347</volume> (<issue>6220</issue>), <fpage>1260419</fpage>. <pub-id pub-id-type="doi">10.1126/science.1260419</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>UniProt</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Orchard</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Magrane</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Agivetova</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Ahmad</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>UniProt: the universal protein knowledgebase in 2021</article-title>. <source>Nucleic Acids Res.</source> <volume>49</volume> (<issue>D1</issue>), <fpage>D480</fpage>&#x2013;<lpage>D489</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaa1100</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wickham</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2016</year>). <source>ggplot2: Elegant graphics for data analysis</source>. <publisher-loc>New York</publisher-loc>: <publisher-name>Springer-Verlag</publisher-name>. </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yoshida</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lareau</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Ramirez</surname>
<given-names>R. N.</given-names>
</name>
<name>
<surname>Rose</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Maier</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wroblewska</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>The cis-regulatory atlas of the mouse immune system</article-title>. <source>Cell.</source> <volume>176</volume> (<issue>4</issue>), <fpage>897</fpage>&#x2013;<lpage>912</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2018.12.036</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Younesy</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Moller</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Lorincz</surname>
<given-names>M. C.</given-names>
</name>
<name>
<surname>Karimi</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Jones</surname>
<given-names>S. J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>VisRseq: R-based visual framework for analysis of sequencing data</article-title>. <source>BMC Bioinforma.</source> <volume>16</volume> (<issue>Suppl. 11</issue>), <fpage>S2</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-16-S11-S2</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ziemann</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Eren</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>El-Osta</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Gene name errors are widespread in the scientific literature</article-title>. <source>Genome Biol.</source> <volume>17</volume> (<issue>1</issue>), <fpage>177</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-016-1044-7</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>