<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="review-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Big Data</journal-id>
<journal-title>Frontiers in Big Data</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Big Data</abbrev-journal-title>
<issn pub-type="epub">2624-909X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdata.2018.00002</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Big Data</subject>
<subj-group>
<subject>Mini Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Data Analytics Applications for Streaming Data From Social Media: What to Predict?</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Emmert-Streib</surname> <given-names>Frank</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/37376/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Yli-Harja</surname> <given-names>Olli P.</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/470408/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Dehmer</surname> <given-names>Matthias</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/37754/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Predictive Medicine and Data Analytics Lab, Department of Signal Processing, Tampere University of Technology</institution>, <addr-line>Tampere</addr-line>, <country>Finland</country></aff>
<aff id="aff2"><sup>2</sup><institution>Institute of Biosciences and Medical Technology</institution>, <addr-line>Tampere</addr-line>, <country>Finland</country></aff>
<aff id="aff3"><sup>3</sup><institution>Institute for Systems Biology</institution>, <addr-line>Seattle, WA</addr-line>, <country>United States</country></aff>
<aff id="aff4"><sup>4</sup><institution>Department for Biomedical Computer Science and Mechatronics, UMIT - The Health and Lifesciences University</institution>, <addr-line>Hall in Tyrol</addr-line>, <country>Austria</country></aff>
<aff id="aff5"><sup>5</sup><institution>Faculty for Management, Institute for Intelligent Production, University of Applied Sciences Upper Austria</institution>, <addr-line>Steyr</addr-line>, <country>Austria</country></aff>
<aff id="aff6"><sup>6</sup><institution>College of Computer and Control Engineering, Nankai University</institution>, <addr-line>Tianjin</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Dongwon Lee, Pennsylvania State University, United States</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Seungwon Yang, Louisiana State University, United States; Lei Li, Hefei University of Technology, China; Jingrui He, Arizona State University, United States</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Frank Emmert-Streib <email>v&#x00040;bio-complexity.com</email></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Data Mining and Management, a section of the journal Frontiers in Big Data</p></fn></author-notes>
<pub-date pub-type="epub">
<day>11</day>
<month>09</month>
<year>2018</year>
</pub-date>
<pub-date pub-type="collection">
<year>2018</year>
</pub-date>
<volume>1</volume>
<elocation-id>2</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>06</month>
<year>2018</year>
</date>
<date date-type="accepted">
<day>02</day>
<month>08</month>
<year>2018</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2018 Emmert-Streib, Yli-Harja and Dehmer.</copyright-statement>
<copyright-year>2018</copyright-year>
<copyright-holder>Emmert-Streib, Yli-Harja and Dehmer</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Social media in general provide great opportunities for mining massive amounts of text, image, and video-based data. However, what questions can be addressed from analyzing such data? In this review, we are focusing on microblogging services and discuss applications of streaming data from the scientific literature. We will focus on text-based approaches because they represent by far the largest cohort of studies and we present a taxonomy of studied problems.</p>
</abstract>
<kwd-group>
<kwd>social media</kwd>
<kwd>data analytics</kwd>
<kwd>prediction model</kwd>
<kwd>forecasting</kwd>
<kwd>big data</kwd>
<kwd>computational social science</kwd>
<kwd>scientometrics</kwd>
<kwd>data science</kwd>
</kwd-group>
<counts>
<fig-count count="2"/>
<table-count count="1"/>
<equation-count count="0"/>
<ref-count count="61"/>
<page-count count="7"/>
<word-count count="5356"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1. Introduction</title>
<p>The establishment of the World Wide Web (WWW) in the 1990s revolutionized the communication between people in many different and profound ways affecting our professional and social life alike. One particular consequence of the WWW has been the creation of social media that provide a forum for the direct exchange of digital information in the form of texts, photos, or videos, e.g., via blogs, microblogs, photo sharing, video sharing, social bookmarking, virtual worlds, social gaming, or social networking web pages. The top sites such as Twitter, Facebook, LinkedIn, and Google&#x0002B; are used by hundreds of millions of active users worldwide. In the following, we will focus on text-based social networking services for microblogging that are publicly accessible. This excludes Instagram (image-based) and Youtube (video-based) but also Whatsapp (not publicly accessible chats) from our considerations.</p>
<p>Due to the relatively brief history of the WWW and the social networking services there is still a severe lack of understanding what, e.g., the information provided by microblogs can be used for. For this reason, we provide a review of the literature with a focus on application areas of prediction models that have been developed so far for analyzing data from microblogging services.</p>
<p>By prediction models we mean methods that aim at forecasting new events rather than merely summarizing or describing information contained in data. For instance, among the first studied questions of social media were investigations related to the topological structure of social networks. Specifically, the degree distribution, the community structure and motifs of acquaintance networks representing the &#x0201C;friendships&#x0201D; among members of social networking services, corresponding to nodes in such graphs, have been investigated (Java et al., <xref ref-type="bibr" rid="B28">2007</xref>; Aparicio et al., <xref ref-type="bibr" rid="B4">2015</xref>). Such studies are more descriptive in nature. Instead, in this review we present an overview of the literature that use social media data for classification, regression, or time series prediction problems.</p>
</sec>
<sec id="s2">
<title>2. General application fields and number of publications</title>
<p>We are starting our review my demonstrating that the field of social media analytics is of great interdisciplinary interest occupying already today a large share in the literature.</p>
<p>In order to show this, we are using the Web of Science (WoS) (Clarivate Analytics, <xref ref-type="bibr" rid="B14">2009</xref>) database, which is an online subscription-based citation indexing service operated by Clarivate Analytics. WoS contains comprehensive information about published scientific articles in all areas. We used WoS searching for articles containing the name of a microblog either in the title, abstract, or as a keyword we found: Twitter: 16614, Facebook: 15483, Tumblr: 175, GNU social (previously known as StatusNet and Laconica): 72, Plurk: 56. From this we conclude that the by far most frequently investigated microblogs in the literature are Twitter and Facebook. For this reason, we will focus on these in the following.</p>
<p>In Figure <xref ref-type="fig" rid="F1">1A</xref>. an overview of scientific fields is shown as tagged to published articles containing the keyword Twitter or Facebook, either in the title, the abstract, or as a keyword. It is not surprising that most publications are computer science or social science related. However, also quite a large fraction of papers comes from medicine, management &#x00026; business, and even arts &#x00026; humanities. Interestingly, the fraction of psychology related publications is rather low despite the fact that intuitively one would name this field first due to the personal nature of tweets and Facebook postings. One reason for this underrepresentation may be related to computational obstacles psychologists need to overcome when they want to analyze social media data because available tools may not allow to tackle targeted research questions as conceived by psychologists.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p><bold>(A)</bold> Scientific fields of published articles investigating Twitter (TW) or Facebook (FB). <bold>(B)</bold> The number of published articles containing the keywords Twitter, Facebook, &#x0201C;machine learning&#x0201D; (ML) or &#x0201C;artificial intelligence&#x0201D; (AI). The numbers ML &#x0003D; &#x0002B;3266 and AI &#x0003D; &#x0002B;12560 indicate the baseline shift for ML and AI. <bold>(C)</bold> Scatter plot comparing articles containing the social media (SM) keyword Twitter or Facebook with articles containing additionally &#x0201C;prediction&#x0201D; or &#x0201C;forecast.&#x0201D; The shown &#x0201C;forecast.&#x0201D; The shown percentages are for Twitter giving the fraction of prediction related publications referred to all publications. <bold>(D)</bold> Similar to <bold>(C)</bold>, but now containing additionally the keywords &#x0201C;cross validation&#x0201D; (CV) or &#x0201C;resampling&#x0201D;.</p></caption>
<graphic xlink:href="fdata-01-00002-g0001.tif"/>
</fig>
<p>In Figure <xref ref-type="fig" rid="F1">1B</xref>, we show the number of published articles containing the keywords Twitter, Facebook, &#x0201C;machine learning&#x0201D; or &#x0201C;artificial intelligence.&#x0201D; For papers containing the words Twitter or Facebook these numbers are total numbers, for &#x0201C;machine learning&#x0201D; and &#x0201C;artificial intelligence&#x0201D; these numbers are subtracted by the minimal number of published papers in these fields between 2006 and 2016. For &#x0201C;machine learning&#x0201D; this number is 3266 and for &#x0201C;artificial intelligence&#x0201D; it is 12560. By subtracting these numbers we shifted both curves downward (baseline shift) to make all four curves comparable with each other due to the fact that articles investigating Twitter or Facebook commenced only around 2008 whereas the work in machine learning and artificial intelligence goes much further back. In this sense, the curves shown for machine learning and artificial intelligence provide only information about <italic>new research directions</italic> as started around 2008. From this comparison we learn that the proportion of social media related publications compared to all articles involving machine learning or artificial intelligence is amazingly high, making it about 1/4 in 2016. Another tendency we can observe is that the number of Twitter related publications is overtaking Facebook since 2013. We did not include the years 2017 and 2018 in Figure <xref ref-type="fig" rid="F1">1B</xref>. because the counts in WoS are still incomplete but also for these years we find this trend to continue (data not shown).</p>
</sec>
<sec id="s3">
<title>3. Applications</title>
<sec>
<title>3.1. Specific scientific application fields</title>
<p>The idea of utilizing data from social media for making predictions has generated great interest (Kalampokis et al., <xref ref-type="bibr" rid="B31">2013</xref>; Schoen et al., <xref ref-type="bibr" rid="B50">2013</xref>). The question is what can one predict based on such data? Prominent examples for such studies are prediction models that investigated the emotional constitution of people (Fernandez et al., <xref ref-type="bibr" rid="B20">2012</xref>; Kross et al., <xref ref-type="bibr" rid="B34">2013</xref>; Ortigosa et al., <xref ref-type="bibr" rid="B44">2014</xref>), personal traits and characters (Kosinski et al., <xref ref-type="bibr" rid="B33">2013</xref>), stock market behavior (Bollen et al., <xref ref-type="bibr" rid="B9">2011</xref>; Siganos et al., <xref ref-type="bibr" rid="B51">2014</xref>), election results (Alonso and Vilares, <xref ref-type="bibr" rid="B3">2016</xref>; Tumasjan et al., <xref ref-type="bibr" rid="B53">2011</xref>).</p>
<p>Further examples are consumer behavior (Ringelhan et al., <xref ref-type="bibr" rid="B46">2015</xref>), public health (Sinnenberg et al., <xref ref-type="bibr" rid="B52">2017</xref>), opinion flow (Wu et al., <xref ref-type="bibr" rid="B58">2014</xref>), sharing cascades (Kupavskii et al., <xref ref-type="bibr" rid="B35">2012</xref>; Cheng et al., <xref ref-type="bibr" rid="B10">2014</xref>), account classification (Chu et al., <xref ref-type="bibr" rid="B11">2010</xref>, <xref ref-type="bibr" rid="B12">2012</xref>; Dickerson et al., <xref ref-type="bibr" rid="B18">2014</xref>), conflicts among friends (Liu and Weber, <xref ref-type="bibr" rid="B40">2014</xref>), demographics of users (Culotta et al., <xref ref-type="bibr" rid="B16">2015</xref>), mental health (Guntuku et al., <xref ref-type="bibr" rid="B23">2017</xref>), heart disease (Eichstaedt et al., <xref ref-type="bibr" rid="B19">2015</xref>), tourism (information search and decision-making behaviors) (Zeng and Gerritsen, <xref ref-type="bibr" rid="B60">2014</xref>), word-of-mouth (WOM) or consumer reviews (Zhang et al., <xref ref-type="bibr" rid="B61">2012</xref>), box-office revenue of movies (Asur and Huberman, <xref ref-type="bibr" rid="B6">2010</xref>), levels of rainfall (Lampos and Cristianini, <xref ref-type="bibr" rid="B36">2012</xref>), earthquakes (Sakaki et al., <xref ref-type="bibr" rid="B48">2010</xref>), theoretical implications introduced by social media (Kane et al., <xref ref-type="bibr" rid="B32">2014</xref>). In Table <xref ref-type="table" rid="T1">1</xref> we provide a comprehensive overview of many important questions that have been studied using social media data. We would like to note that here we emphasized the &#x0201C;What to predict&#x0201D; aspect of these studies by highlighting the questions that have been addressed.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>An overview of questions addressing &#x0201C;What do predict&#x0201D; with social media data.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>&#x0201C;What to predict&#x0201D;</bold></th>
<th valign="top" align="left"><bold>References</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Bot detection (account classification)</td>
<td valign="top" align="left">Chu et al. (<xref ref-type="bibr" rid="B11">2010</xref>, <xref ref-type="bibr" rid="B12">2012</xref>); Dickerson et al. (<xref ref-type="bibr" rid="B18">2014</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Box-office revenue of movies</td>
<td valign="top" align="left">Asur and Huberman (<xref ref-type="bibr" rid="B6">2010</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Company value</td>
<td valign="top" align="left">Luo and Zhang (<xref ref-type="bibr" rid="B41">2013</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Conflicts among friends</td>
<td valign="top" align="left">Liu and Weber (<xref ref-type="bibr" rid="B40">2014</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Consumer behavior</td>
<td valign="top" align="left">Ringelhan et al. (<xref ref-type="bibr" rid="B46">2015</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Crime incidents</td>
<td valign="top" align="left">Gerber (<xref ref-type="bibr" rid="B21">2014</xref>); Aghababaei and Makrehchi (<xref ref-type="bibr" rid="B1">2016</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Demographics of users</td>
<td valign="top" align="left">Culotta et al. (<xref ref-type="bibr" rid="B16">2015</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Earthquakes</td>
<td valign="top" align="left">Sakaki et al. (<xref ref-type="bibr" rid="B48">2010</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Election results</td>
<td valign="top" align="left">Alonso and Vilares (<xref ref-type="bibr" rid="B3">2016</xref>); Tumasjan et al. (<xref ref-type="bibr" rid="B53">2011</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Emotional constitution of people</td>
<td valign="top" align="left">Fernandez et al. (<xref ref-type="bibr" rid="B20">2012</xref>); Kross et al. (<xref ref-type="bibr" rid="B34">2013</xref>); Ortigosa et al. (<xref ref-type="bibr" rid="B44">2014</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Epidemic of infection disease</td>
<td valign="top" align="left">Santillana et al. (<xref ref-type="bibr" rid="B49">2015</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Fake news</td>
<td valign="top" align="left">Gupta et al. (<xref ref-type="bibr" rid="B24">2013</xref>); Conroy et al. (<xref ref-type="bibr" rid="B15">2015</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Heart disease</td>
<td valign="top" align="left">Eichstaedt et al. (<xref ref-type="bibr" rid="B19">2015</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Mental health</td>
<td valign="top" align="left">De Choudhury et al. (<xref ref-type="bibr" rid="B17">2013</xref>); Guntuku et al. (<xref ref-type="bibr" rid="B23">2017</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Popularity of news</td>
<td valign="top" align="left">Bandari et al. (<xref ref-type="bibr" rid="B7">2012</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Movie ratings</td>
<td valign="top" align="left">Oghina et al. (<xref ref-type="bibr" rid="B42">2012</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Opinion flow</td>
<td valign="top" align="left">Wu et al. (<xref ref-type="bibr" rid="B58">2014</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Personal traits and characters</td>
<td valign="top" align="left">Kosinski et al. (<xref ref-type="bibr" rid="B33">2013</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Public health</td>
<td valign="top" align="left">Robillard et al. (<xref ref-type="bibr" rid="B47">2013</xref>); Sinnenberg et al. (<xref ref-type="bibr" rid="B52">2017</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Sharing cascades</td>
<td valign="top" align="left">Kupavskii et al. (<xref ref-type="bibr" rid="B35">2012</xref>); Cheng et al. (<xref ref-type="bibr" rid="B10">2014</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Stock market behavior</td>
<td valign="top" align="left">Bollen et al. (<xref ref-type="bibr" rid="B9">2011</xref>); Siganos et al. (<xref ref-type="bibr" rid="B51">2014</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Rainfall levels</td>
<td valign="top" align="left">Lampos and Cristianini (<xref ref-type="bibr" rid="B36">2012</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Suicide rates</td>
<td valign="top" align="left">Won et al. (<xref ref-type="bibr" rid="B57">2013</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Tourism</td>
<td valign="top" align="left">Zeng and Gerritsen (<xref ref-type="bibr" rid="B60">2014</xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Word-of-mouth (WOM) or consumer reviews</td>
<td valign="top" align="left">Zhang et al. (<xref ref-type="bibr" rid="B61">2012</xref>)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As one can see from Table <xref ref-type="table" rid="T1">1</xref> there are many different questions studied so far. In order to organize these publications, we introduce a taxonomy to categorize these publications according to a few major variables. In Figure <xref ref-type="fig" rid="F1">1</xref> we give a graphical summary of our taxonomy. Overall, these questions fall into seven different fields (E, Economy; G, Geophysics; H, Health; M, Management; S, Sociology; Ps, Psychology; Po, Politology) covering almost all science areas. In this figure, we provide furthermore information about four additional layers, namely (I) the time horizon of the prediction (horizon) for making predictions about the future (F) or the present (P), (II) the level of prediction (level) for macro (Ma) and micro (Mi) level predictions, (III) the time of prediction (time) for batch (Ba) and real-time (Rt) predictions, and for (IV) making spatial (Sp) or non-spatial (Ns) predictions. Each of these layers will be discussed in the following sections.</p>
<p>One area missing from the above (see Figure <xref ref-type="fig" rid="F2">2</xref>) were studies in humanities. By performing a WoS search looking for articles containing the words Twitter/Facebook, humanities, and prediction/forecast we found no results. However, we found articles (54) searching for Twitter/Facebook and humanities. Interestingly, these articles are descriptive rather than predictive in nature. Examples for such studies are (Vainio and Holmberg, <xref ref-type="bibr" rid="B54">2017</xref>). In Lee et al. (<xref ref-type="bibr" rid="B39">2017</xref>) and Vainio and Holmberg (<xref ref-type="bibr" rid="B54">2017</xref>) the authors studied who tweeted scientific articles with at least one Finnish author/co-author and that had high altmetric counts on Twitter and in Lee et al. (<xref ref-type="bibr" rid="B39">2017</xref>) the use of Twitter by scholars in the digital humanities was studied for informal scholarly communication. Those and similar papers performed a descriptive statistical analysis but no predictions were made.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Taxonomy of questions that have been investigated so far by prediction models. Overall, these questions fall into seven different applications. E, Economy; G, Geophysics; H, Health; M, Management; S, Sociology; Ps, Psychology; Po, Politology. In addition, distinctions are made regarding the horizon, level, time, and spatial nature of the predictions (see main text for details).</p></caption>
<graphic xlink:href="fdata-01-00002-g0002.tif"/>
</fig>
</sec>
<sec>
<title>3.2. Time horizon of the forecasting</title>
<p>There are two different types of prediction models used in the literature with respect to the prediction itself. The first type predicts the future and the second prediction type predicts the present. The former type is naturally understood because this is what is usually implied by a prediction or a forecast, namely that it should tell us something about the near or far future. For this reason, almost all of the above studies are from this type. However, the second type is unconventional because neither in classical statistics nor machine learning such predictions are made. An example in our context is the prediction of rainfall levels Lampos and Cristianini (<xref ref-type="bibr" rid="B36">2012</xref>). Here the idea is to use Twitter users as sort of <italic>social sensors</italic> that report real-world events instantaneously. Another example is the prediction of earthquakes (Sakaki et al., <xref ref-type="bibr" rid="B48">2010</xref>). In the literature such predictions are called <italic>nowcasting</italic> or <italic>predicting the present</italic> (Schoen et al., <xref ref-type="bibr" rid="B50">2013</xref>).</p>
</sec>
<sec>
<title>3.3. Macro- vs. micro-level predictions</title>
<p>Another distinction in the predictions is with respect to the level of the prediction. The majority of articles makes predictions on a macro-level for which individual Twitter or Facebook users are irrelevant. Instead, what is important is the aggregation of users into categories. Examples for this is, e.g., predicting outcome of elections or box-office success of movies (Asur and Huberman, <xref ref-type="bibr" rid="B6">2010</xref>; Alonso and Vilares, <xref ref-type="bibr" rid="B3">2016</xref>; Tumasjan et al., <xref ref-type="bibr" rid="B53">2011</xref>). In contrast, predictions on the micro-level make predictions for Twitter or Facebook users themselves. Examples are predicting the personality (Golbeck et al., <xref ref-type="bibr" rid="B22">2011</xref>; Quercia et al., <xref ref-type="bibr" rid="B45">2011</xref>; Hughes et al., <xref ref-type="bibr" rid="B27">2012</xref>; Youyou et al., <xref ref-type="bibr" rid="B59">2015</xref>) or human mobility (Jurdak et al., <xref ref-type="bibr" rid="B30">2015</xref>).</p>
</sec>
<sec>
<title>3.4. Batch vs. real-time predictions</title>
<p>The difference between batch and real-time models is that in the former case data are gathered off-line and then one prediction is made. In the latter case this process is iterated multiple times and data are generated on-line. Examples for batch predictions are election forecasts whereas real-time predictions forecast the political opinion continuously (Alonso and Vilares, <xref ref-type="bibr" rid="B3">2016</xref>; Tumasjan et al., <xref ref-type="bibr" rid="B53">2011</xref>). In general, the need for developing a real-time model depends on the application one is aiming at. For instance, if one intends to predict the outbreak of an epidemic of an infection disease this needs to be done in a real-time manner because there is not one scheduled event to occur one wants to predict but there is all the time a possibility for the outbreak to happen (Robillard et al., <xref ref-type="bibr" rid="B47">2013</xref>; Santillana et al., <xref ref-type="bibr" rid="B49">2015</xref>). Another example is the prediction of stock market values (Bollen et al., <xref ref-type="bibr" rid="B9">2011</xref>; Siganos et al., <xref ref-type="bibr" rid="B51">2014</xref>).</p>
</sec>
<sec>
<title>3.5. Non-spatial vs. spatial predictions</title>
<p>A final distinction of prediction models relates to non-spatial vs spatial predictions. A non-spatial prediction makes a forecast for the population as a whole, e.g., the outcome of an election (Alonso and Vilares, <xref ref-type="bibr" rid="B3">2016</xref>; Tumasjan et al., <xref ref-type="bibr" rid="B53">2011</xref>). In contrast, a spatial prediction makes a forecast for, e.g., all municipalities of a country. In this sense predictions in the former case can be considered as <italic>scalar</italic> whereas in the latter case they are <italic>multivariate</italic>. In order to accomplish a spatial prediction, usually information about the geolocation of the users is utilized. This information may be either directly available, or needs to be inferred from the content of the microblogs.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>4. Discussion</title>
<p>As we have shown in Figure <xref ref-type="fig" rid="F1">1B</xref>, the interest in studying data from social media increases every year. However, also the proportion of prediction related publications increases every year. In order to see this we show Figure <xref ref-type="fig" rid="F1">1C</xref>. In this scatter plot we show results we obtained from a WoS search for articles containing the social media (SM) keyword Twitter or Facebook (x-axis) and for articles containing additionally the keywords &#x0201C;prediction&#x0201D; or &#x0201C;forecast&#x0201D; (y-axis). The fraction of the values on the y-axis to the values on the x-axis, i.e., <italic>y</italic><sub><italic>i</italic></sub>/<italic>x</italic><sub><italic>i</italic></sub>, gives the percentage of prediction related publications compared to all publications. In Figure <xref ref-type="fig" rid="F1">1C</xref>. the shows values are for Twitter (values for Facebook are similar). Due to the fact that the number of publications increases every year, as can be seen from Figure <xref ref-type="fig" rid="F1">1B</xref>, the x-axis in this figure is proportional to the publication year and, hence, one can see that the fraction of prediction related publications increases over the years reaching currently well over 60%.</p>
<sec>
<title>4.1. Gaps in the literature</title>
<p>When collecting the articles for this review we noticed that despite the fact that all considered publications utilize prediction models, only a small fraction of these make an attempt to ensure the statistical soundness of the models. As a simple indicator for this omission we searched the WoS for articles containing the keywords Twitter or Facebook and for articles that contain the keywords Twitter and cross validation or Twitter and resampling (similarly for Facebook). The result of these searches is shown as a scatter plot in Figure <xref ref-type="fig" rid="F1">1D</xref>. The shown pairs correspond to the same publication year and y-axis label SM &#x00026; CV is an abrieviation for our second search query. This figure confirms our perception indicating that only a small fraction of all articles applies resampling methods in order to quantify the uncertainty in the data and to guard against overfitting. Given the fact that the analyzed social media data are &#x0201C;big,&#x0201D; resampling methods can always be applied. Overall, this indicates a possible problem that would require further analysis.</p>
</sec>
<sec>
<title>4.2. Potential future developments</title>
<sec>
<title>4.2.1. Data integration</title>
<p>The vast majority of studies analyzed only data from social media. However, a combination of such data with external data would allow to address further questions. For instance, health related studies could benefit from <italic>integrating data</italic> from disease databases, e.g., Online Mendelian Inheritance in Man (OMIM) (OMI, <xref ref-type="bibr" rid="B43">2007</xref>), Gene Ontology (Ashburner et al., <xref ref-type="bibr" rid="B5">2000</xref>), or DrugBank (Wishart et al., <xref ref-type="bibr" rid="B56">2007</xref>). This approach enables also in a natural way the extension of text mining approaches because the external information may be utilized in form of dictionaries, e.g., lists of words from a specific category, that can be used to perform a guided sentiment analysis.</p>
<p>Support for our argument for using external information is provided by Ciulla et al. (<xref ref-type="bibr" rid="B13">2012</xref>). The authors found that information provided by tweets alone is not sufficient in order to predict the outcome of a social event (the winner of American Idol) but tweets need to be complemented with information about the geographic location of the tweets.</p>
<p>Another purpose for data integration could be for increasing prediction accuracy and reducing prediction errors. This could be accomplished by utilizing different, independent sources of social media data. In this way one could also naturally obtain quantitative estimates for the variability in the data.</p>
</sec>
<sec>
<title>4.2.2. Social networks</title>
<p>A further direction to explore could be the utilization of social networks (Wasserman and Faust, <xref ref-type="bibr" rid="B55">1994</xref>). An example area where this could be of relevance is studies about infectious outbreaks. The reason for this is that an infection can only spread by human contacts. However, usually, this human contact network is not known. As an approximation for such a human contact network one could utilize data from social media to infer such a network. The simplest way to do this could be by utilizing the information &#x0201C;who is a follower of whom&#x0201D; which can be directly extracted from Twitter. However, one can go beyond these follower networks by also constructing semantic networks. The semantic networks could be constructed from estimating the similarity, e.g., among Twitter users based on the content of their tweets and conditioned on metadata. As a result, the information from these different networks could be integrated leading to characteristic spatial scores of the twitter activity and content in specific area.</p>
</sec>
<sec>
<title>4.2.3. Deep learning</title>
<p>Finally, it will be interesting to see if new machine learning and artificial intelligence methods, above all deep learning methods (Hinton et al., <xref ref-type="bibr" rid="B26">2006</xref>; Bengio et al., <xref ref-type="bibr" rid="B8">2009</xref>; LeCun et al., <xref ref-type="bibr" rid="B37">2015</xref>), e.g., deep neural networks, deep decision trees or deep belief networks, will change the <italic>type of questions</italic> addressed with social media data. So far, deep learning methods have found ample applications in image recognition, audio classification, genomics and text mining, e.g., (Lee et al., <xref ref-type="bibr" rid="B38">2009</xref>; Alipanahi et al., <xref ref-type="bibr" rid="B2">2015</xref>; Jiang et al., <xref ref-type="bibr" rid="B29">2015</xref>; He et al., <xref ref-type="bibr" rid="B25">2016</xref>), however, for social media mining we cannot observe from the current literature that new &#x0201C;What to predict&#x0201D; questions have emerged. Instead, familiar questions are studied with these new methodologies focusing on &#x0201C;How to predict.&#x0201D; Maybe, more experience is needed until scientists find new questions that can be raised with such computer- and data-intense approaches.</p>
</sec>
</sec>
</sec>
<sec sec-type="conclusions" id="s5">
<title>5. Conclusions</title>
<p>In this paper we surveyed the literature of prediction models for social media with a focus on the questions that have been addressed so far. Since we are observing a transition from descriptive to predictive studies in the last years (see Figure <xref ref-type="fig" rid="F1">1C</xref>) a taxonomy of such questions is a natural first step in understanding the capabilities of social media. We anticipate this trend to continue and the diversity of question to increase. However, a necessity for the latter is a better comprehension of the data social media provide by exploring their limitations and possibilities with respect to statistical models.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>All authors listed have made a substantial, direct and intellectual contribution to the work, and approved it for publication.</p>
<sec>
<title>Conflict of interest statement</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
</sec>
</body>
<back>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Aghababaei</surname> <given-names>S.</given-names></name> <name><surname>Makrehchi</surname> <given-names>M.</given-names></name></person-group> (<year>2016</year>). <article-title>Mining social media content for crime prediction</article-title>, in <source>Web Intelligence (WI), 2016 IEEE/WIC/ACM International Conference on</source> (<publisher-loc>Omaha, NE: IEEE</publisher-loc>), <fpage>526</fpage>&#x02013;<lpage>531</lpage>.</citation></ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alipanahi</surname> <given-names>B.</given-names></name> <name><surname>Delong</surname> <given-names>A.</given-names></name> <name><surname>Weirauch</surname> <given-names>M. T.</given-names></name> <name><surname>Frey</surname> <given-names>B. J.</given-names></name></person-group> (<year>2015</year>). <article-title>Predicting the sequence specificities of dna-and rna-binding proteins by deep learning</article-title>. <source>Nat. Biotechnol.</source> <volume>33</volume>, <fpage>831</fpage>&#x02013;<lpage>838</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.3300</pub-id><pub-id pub-id-type="pmid">26213851</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alonso</surname> <given-names>M.</given-names></name> <name><surname>Vilares</surname> <given-names>D.</given-names></name></person-group> (<year>2016</year>). <article-title>A review on political analysis and social media</article-title>. <source>Procesamiento Leng. Nat.</source> <volume>56</volume>, <fpage>13</fpage>&#x02013;<lpage>23</lpage>.</citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Aparicio</surname> <given-names>S.</given-names></name> <name><surname>Villaz&#x000F3;n-Terrazas</surname> <given-names>J.</given-names></name> <name><surname>&#x000C1;lvarez</surname> <given-names>G.</given-names></name></person-group> (<year>2015</year>). <article-title>A model for scale-free networks: application to twitter</article-title>. <source>Entropy</source> <volume>17</volume>, <fpage>5848</fpage>&#x02013;<lpage>5867</lpage>. <pub-id pub-id-type="doi">10.3390/e17085848</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ashburner</surname> <given-names>M.</given-names></name> <name><surname>Ball</surname> <given-names>C. A.</given-names></name> <name><surname>Blake</surname> <given-names>J. A.</given-names></name> <name><surname>Botstein</surname> <given-names>D.</given-names></name> <name><surname>Butler</surname> <given-names>H.</given-names></name> <name><surname>Cherry</surname> <given-names>J. M.</given-names></name> <etal/></person-group>. (<year>2000</year>). <article-title>Gene ontology: tool for the unification of biology. The Gene Ontology Consortium</article-title>. <source>Nat. Genet.</source> <volume>25</volume>, <fpage>25</fpage>&#x02013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1038/75556</pub-id><pub-id pub-id-type="pmid">10802651</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Asur</surname> <given-names>S.</given-names></name> <name><surname>Huberman</surname> <given-names>B. A.</given-names></name></person-group> (<year>2010</year>). <article-title>Predicting the future with social media</article-title>, in <source>Proceedings of the 2010 IEEE/WIC/ACM International Conference on Web Intelligence and Intelligent Agent Technology - Volume 01; WI-IAT &#x00027;10</source> (<publisher-loc>Washington, DC: IEEE Computer Society</publisher-loc>), <fpage>492</fpage>&#x02013;<lpage>499</lpage>. <pub-id pub-id-type="doi">10.1109/WI-IAT.2010.63</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bandari</surname> <given-names>R.</given-names></name> <name><surname>Asur</surname> <given-names>S.</given-names></name> <name><surname>Huberman</surname> <given-names>B. A.</given-names></name></person-group> (<year>2012</year>). <article-title>The pulse of news in social media: Forecasting popularity</article-title>, in <source>ICWSM, Vol. 12</source>, <fpage>26</fpage>&#x02013;<lpage>33</lpage>.</citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bengio</surname> <given-names>Y.</given-names></name></person-group> (<year>2009</year>). <article-title>Learning deep architectures for AI</article-title>. <source>Found. Trends Mach. Learn.</source> <volume>2</volume>, <fpage>1</fpage>&#x02013;<lpage>127</lpage>. <pub-id pub-id-type="doi">10.1561/2200000006</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bollen</surname> <given-names>J.</given-names></name> <name><surname>Mao</surname> <given-names>H.</given-names></name> <name><surname>Zeng</surname> <given-names>X.</given-names></name></person-group> (<year>2011</year>). <article-title>Twitter mood predicts the stock market</article-title>. <source>J. Comput. Sci.</source> <volume>2</volume>, <fpage>1</fpage>&#x02013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1016/j.jocs.2010.12.007</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Cheng</surname> <given-names>J.</given-names></name> <name><surname>Adamic</surname> <given-names>L.</given-names></name> <name><surname>Dow</surname> <given-names>P. A.</given-names></name> <name><surname>Kleinberg</surname> <given-names>J. M.</given-names></name> <name><surname>Leskovec</surname> <given-names>J.</given-names></name></person-group> (<year>2014</year>). <article-title>Can cascades be predicted?</article-title>, in <source>Proceedings of the 23rd International Conference on World Wide Web</source> (<publisher-loc>Seoul</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>925</fpage>&#x02013;<lpage>936</lpage>.</citation></ref>
<ref id="B11">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Chu</surname> <given-names>Z.</given-names></name> <name><surname>Gianvecchio</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name> <name><surname>Jajodia</surname> <given-names>S.</given-names></name></person-group> (<year>2010</year>). <article-title>Who is tweeting on twitter: human, bot, or cyborg?</article-title>, in <source>Proceedings of the 26th Annual Computer Security Applications Conference</source> (<publisher-loc>Austin, TX</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>21</fpage>&#x02013;<lpage>30</lpage>.</citation></ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chu</surname> <given-names>Z.</given-names></name> <name><surname>Gianvecchio</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name> <name><surname>Jajodia</surname> <given-names>S.</given-names></name></person-group> (<year>2012</year>). <article-title>Detecting automation of twitter accounts: are you a human, bot, or cyborg?</article-title> <source>IEEE Trans. Depend. Secure Comput.</source> <volume>9</volume>, <fpage>811</fpage>&#x02013;<lpage>824</lpage>. <pub-id pub-id-type="doi">10.1109/TDSC.2012.75</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ciulla</surname> <given-names>F.</given-names></name> <name><surname>Mocanu</surname> <given-names>D.</given-names></name> <name><surname>Baronchelli</surname> <given-names>A.</given-names></name> <name><surname>Gon&#x000E7;alves</surname> <given-names>B.</given-names></name> <name><surname>Perra</surname> <given-names>N.</given-names></name> <name><surname>Vespignani</surname> <given-names>A.</given-names></name></person-group> (<year>2012</year>). <article-title>Beating the news using social media: the case study of american idol</article-title>. <source>EPJ Data Sci.</source> <volume>1</volume>:<fpage>8</fpage>. <pub-id pub-id-type="doi">10.1140/epjds8</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="web"><person-group person-group-type="author"><collab>Clarivate Analytics</collab></person-group> (<year>2009</year>). <source>Web of Science</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://en.wikipedia.org/wiki/Clarivate_Analytics">https://en.wikipedia.org/wiki/Clarivate_Analytics</ext-link></citation></ref>
<ref id="B15">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Conroy</surname> <given-names>N. J.</given-names></name> <name><surname>Rubin</surname> <given-names>V. L.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name></person-group> (<year>2015</year>). <article-title>Automatic deception detection: methods for finding fake news</article-title>, in <source>Proceedings of the 78th ASIS&#x00026;T Annual Meeting: Information Science with Impact: Research in and for the Community</source> (<publisher-loc>St. Louis, MO: American Society for Information Science</publisher-loc>), <fpage>82</fpage>.</citation></ref>
<ref id="B16">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Culotta</surname> <given-names>A.</given-names></name> <name><surname>Kumar</surname> <given-names>N. R.</given-names></name> <name><surname>Cutler</surname> <given-names>J.</given-names></name></person-group> (<year>2015</year>). <article-title>Predicting the demographics of twitter users from website traffic data</article-title>, in <source>AAAI</source> (<publisher-loc>Austin, TX</publisher-loc>), <fpage>72</fpage>&#x02013;<lpage>78</lpage>.</citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>De Choudhury</surname> <given-names>M.</given-names></name> <name><surname>Gamon</surname> <given-names>M.</given-names></name> <name><surname>Counts</surname> <given-names>S.</given-names></name> <name><surname>Horvitz</surname> <given-names>E.</given-names></name></person-group> (<year>2013</year>). <article-title>Predicting depression via social media</article-title>. in <source>ICWSM, Vol.13</source>, <fpage>1</fpage>&#x02013;<lpage>10</lpage>.</citation></ref>
<ref id="B18">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Dickerson</surname> <given-names>J. P.</given-names></name> <name><surname>Kagan</surname> <given-names>V.</given-names></name> <name><surname>Subrahmanian</surname> <given-names>V.</given-names></name></person-group> (<year>2014</year>). <article-title>Using sentiment to detect bots on twitter: Are humans more opinionated than bots?</article-title>, in <source>Advances in Social Networks Analysis and Mining (ASONAM), 2014 IEEE/ACM International Conference on</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>620</fpage>&#x02013;<lpage>627</lpage>.</citation></ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Eichstaedt</surname> <given-names>J. C.</given-names></name> <name><surname>Schwartz</surname> <given-names>H. A.</given-names></name> <name><surname>Kern</surname> <given-names>M. L.</given-names></name> <name><surname>Park</surname> <given-names>G.</given-names></name> <name><surname>Labarthe</surname> <given-names>D. R.</given-names></name> <name><surname>Merchant</surname> <given-names>R. M.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>Psychological language on twitter predicts county-level heart disease mortality</article-title>. <source>Psychol. Sci.</source> <volume>26</volume>, <fpage>159</fpage>&#x02013;<lpage>169</lpage>. <pub-id pub-id-type="doi">10.1177/0956797614557867</pub-id><pub-id pub-id-type="pmid">25605707</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fernandez</surname> <given-names>K. C.</given-names></name> <name><surname>Levinson</surname> <given-names>C. A.</given-names></name> <name><surname>Rodebaugh</surname> <given-names>T. L.</given-names></name></person-group> (<year>2012</year>). <article-title>Profiling: predicting social anxiety from facebook profiles</article-title>. <source>Soc. Psychol. Pers. Sci.</source> <volume>3</volume>, <fpage>706</fpage>&#x02013;<lpage>713</lpage>. <pub-id pub-id-type="doi">10.1177/1948550611434967</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gerber</surname> <given-names>M. S.</given-names></name></person-group> (<year>2014</year>). <article-title>Predicting crime using twitter and kernel density estimation</article-title>. <source>Decis. Support Syst.</source> <volume>61</volume>, <fpage>115</fpage>&#x02013;<lpage>125</lpage>. <pub-id pub-id-type="doi">10.1016/j.dss.2014.02.003</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Golbeck</surname> <given-names>J.</given-names></name> <name><surname>Robles</surname> <given-names>C.</given-names></name> <name><surname>Edmondson</surname> <given-names>M.</given-names></name> <name><surname>Turner</surname> <given-names>K.</given-names></name></person-group> (<year>2011</year>). <article-title>Predicting personality from twitter</article-title>, in <source>2011 IEEE Third International Conference on Privacy, Security, Risk and Trust and 2011 IEEE Third International Conference on Social Computing</source> (<publisher-loc>Boston, MA</publisher-loc>), <fpage>149</fpage>&#x02013;<lpage>156</lpage>. <pub-id pub-id-type="doi">10.1109/PASSAT/SocialCom.2011.33</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guntuku</surname> <given-names>S.</given-names></name> <name><surname>Yaden</surname> <given-names>D.</given-names></name> <name><surname>Kern</surname> <given-names>M.</given-names></name> <name><surname>Ungar</surname> <given-names>L.</given-names></name> <name><surname>Eichstaedt</surname> <given-names>J.</given-names></name></person-group> (<year>2017</year>). <article-title>Detecting depression and mental illness on social media: an integrative review</article-title>. <source>Curr. Opin. Behav. Sci.</source> <volume>18</volume>, <fpage>43</fpage>&#x02013;<lpage>49</lpage>. <pub-id pub-id-type="doi">10.1016/j.cobeha.2017.07.005</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Gupta</surname> <given-names>A.</given-names></name> <name><surname>Lamba</surname> <given-names>H.</given-names></name> <name><surname>Kumaraguru</surname> <given-names>P.</given-names></name> <name><surname>Joshi</surname> <given-names>A.</given-names></name></person-group> (<year>2013</year>). <article-title>Faking sandy: characterizing and identifying fake images on twitter during hurricane sandy</article-title>, in <source>Proceedings of the 22nd International Conference on World Wide Web.</source> (<publisher-loc>Rio de Janeiro: ACM</publisher-loc>), <fpage>729</fpage>&#x02013;<lpage>736</lpage>.</citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>He</surname> <given-names>K.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Ren</surname> <given-names>S.</given-names></name> <name><surname>Sun</surname> <given-names>J.</given-names></name></person-group> (<year>2016</year>). <article-title>Deep residual learning for image recognition</article-title>, in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source>, <fpage>770</fpage>&#x02013;<lpage>778</lpage>.</citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hinton</surname> <given-names>G. E.</given-names></name> <name><surname>Osindero</surname> <given-names>S.</given-names></name> <name><surname>Teh</surname> <given-names>Y.-W.</given-names></name></person-group> (<year>2006</year>). <article-title>A fast learning algorithm for deep belief nets</article-title>. <source>Neural Comput.</source> <volume>18</volume>, <fpage>1527</fpage>&#x02013;<lpage>1554</lpage>. <pub-id pub-id-type="doi">10.1162/neco.2006.18.7.1527</pub-id><pub-id pub-id-type="pmid">16764513</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hughes</surname> <given-names>D. J.</given-names></name> <name><surname>Rowe</surname> <given-names>M.</given-names></name> <name><surname>Batey</surname> <given-names>M.</given-names></name> <name><surname>Lee</surname> <given-names>A.</given-names></name></person-group> (<year>2012</year>). <article-title>A tale of two sites: twitter vs. facebook and the personality predictors of social media usage</article-title>. <source>Comput. Hum. Behav.</source> <volume>28</volume>, <fpage>561</fpage>&#x02013;<lpage>569</lpage>. <pub-id pub-id-type="doi">10.1016/j.chb.2011.11.001</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Java</surname> <given-names>A.</given-names></name> <name><surname>Song</surname> <given-names>X.</given-names></name> <name><surname>Finin</surname> <given-names>T.</given-names></name> <name><surname>Tseng</surname> <given-names>B.</given-names></name></person-group> (<year>2007</year>). <article-title>Why we twitter: understanding microblogging usage and communities</article-title>, in <source>Proceedings of the 9th WebKDD and 1st SNA-KDD 2007 Workshop on Web Mining and Social Network Analysis</source> (<publisher-loc>San Jose, CA: ACM</publisher-loc>), <fpage>56</fpage>&#x02013;<lpage>65</lpage>.</citation></ref>
<ref id="B29">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Jiang</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>L.</given-names></name> <name><surname>Huang</surname> <given-names>D.</given-names></name> <name><surname>Jin</surname> <given-names>L.</given-names></name></person-group> (<year>2015</year>). <article-title>Training word embeddings for deep learning in biomedical text mining tasks</article-title>, in <source>Bioinformatics and Biomedicine (BIBM), 2015 IEEE International Conference on</source> (<publisher-loc>Washington, DC: IEEE</publisher-loc>), <fpage>625</fpage>&#x02013;<lpage>628</lpage>.</citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jurdak</surname> <given-names>R.</given-names></name> <name><surname>Zhao</surname> <given-names>K.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>AbouJaoude</surname> <given-names>M.</given-names></name> <name><surname>Cameron</surname> <given-names>M.</given-names></name> <name><surname>Newth</surname> <given-names>D.</given-names></name></person-group> (<year>2015</year>). <article-title>Understanding human mobility from twitter</article-title>. <source>PLOS ONE</source> <volume>10</volume>:<fpage>e37027</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0131469_0131469</pub-id><pub-id pub-id-type="pmid">26154597</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kalampokis</surname> <given-names>E.</given-names></name> <name><surname>Tambouris</surname> <given-names>E.</given-names></name> <name><surname>Tarabanis</surname> <given-names>K.</given-names></name></person-group> (<year>2013</year>). <article-title>Understanding the predictive power of social media</article-title>. <source>Inter. Res.</source> <volume>23</volume>, <fpage>544</fpage>&#x02013;<lpage>559</lpage>. <pub-id pub-id-type="doi">10.1108/IntR-06-2012-0114</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kane</surname> <given-names>G.</given-names></name> <name><surname>Alavi</surname> <given-names>M.</given-names></name> <name><surname>Labianca</surname> <given-names>G.</given-names></name> <name><surname>Borgatti</surname> <given-names>S.</given-names></name></person-group> (<year>2014</year>). <article-title>What&#x00027;s different about social media networks? a framework and research agenda</article-title>. <source>MIS Q.</source> <volume>38</volume>, <fpage>275</fpage>&#x02013;<lpage>304</lpage>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://misq.org/what-s-different-about-social-media-networks-a-framework-and-research-agenda.html">https://misq.org/what-s-different-about-social-media-networks-a-framework-and-research-agenda.html</ext-link></citation></ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kosinski</surname> <given-names>M.</given-names></name> <name><surname>Stillwell</surname> <given-names>D.</given-names></name> <name><surname>Graepel</surname> <given-names>T.</given-names></name></person-group> (<year>2013</year>). <article-title>Private traits and attributes are predictable from digital records of human behavior</article-title>. <source>Proc. Natl. Acad. Sci. U.S.A.</source> <volume>110</volume>, <fpage>5802</fpage>&#x02013;<lpage>5805</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1218772110</pub-id><pub-id pub-id-type="pmid">23479631</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kross</surname> <given-names>E.</given-names></name> <name><surname>Verduyn</surname> <given-names>P.</given-names></name> <name><surname>Demiralp</surname> <given-names>E.</given-names></name> <name><surname>Park</surname> <given-names>J.</given-names></name> <name><surname>Lee</surname> <given-names>D. S.</given-names></name> <name><surname>Lin</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>Facebook use predicts declines in subjective well-being in young adults</article-title>. <source>PLOS ONE</source> <volume>8</volume>:<fpage>e69841</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0069841</pub-id><pub-id pub-id-type="pmid">23967061</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kupavskii</surname> <given-names>A.</given-names></name> <name><surname>Ostroumova</surname> <given-names>L.</given-names></name> <name><surname>Umnov</surname> <given-names>A.</given-names></name> <name><surname>Usachev</surname> <given-names>S.</given-names></name> <name><surname>Serdyukov</surname> <given-names>P.</given-names></name> <name><surname>Gusev</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>Prediction of retweet cascade size over time</article-title>, in <source>Proceedings of the 21st ACM International Conference on Information Knowledge Management</source> (<publisher-loc>Maui, HI</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>2335</fpage>&#x02013;<lpage>2338</lpage>.</citation></ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lampos</surname> <given-names>V.</given-names></name> <name><surname>Cristianini</surname> <given-names>N.</given-names></name></person-group> (<year>2012</year>). <article-title>Nowcasting events from the social web with statistical learning</article-title>. <source>ACM Trans. Intell. Syst. Technol.</source> <volume>72</volume>, <fpage>1</fpage>&#x02013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1145/2337542.2337557</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>LeCun</surname> <given-names>Y.</given-names></name> <name><surname>Bengio</surname> <given-names>Y.</given-names></name> <name><surname>Hinton</surname> <given-names>G.</given-names></name></person-group> (<year>2015</year>). <article-title>Deep learning</article-title>. <source>Nature</source> <volume>521</volume>, <fpage>436</fpage>. <pub-id pub-id-type="pmid">26017442</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>H.</given-names></name> <name><surname>Pham</surname> <given-names>P.</given-names></name> <name><surname>Largman</surname> <given-names>Y.</given-names></name> <name><surname>Ng</surname> <given-names>A. Y.</given-names></name></person-group> (<year>2009</year>). <article-title>Unsupervised feature learning for audio classification using convolutional deep belief networks</article-title>, in <source>Advances in Neural Information Processing Systems</source> (<publisher-loc>Vancouver, BC</publisher-loc>), <fpage>1096</fpage>&#x02013;<lpage>1104</lpage>.</citation></ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>M.</given-names></name> <name><surname>Yoon</surname> <given-names>H.</given-names></name> <name><surname>Smith</surname> <given-names>M.</given-names></name> <name><surname>Park</surname> <given-names>H.</given-names></name> <name><surname>Park</surname> <given-names>H.</given-names></name></person-group> (<year>2017</year>). <article-title>Mapping a twitter scholarly communication network: a case of the association of internet researchers? conference</article-title>. <source>Scientometrics</source> <volume>112</volume>, <fpage>767</fpage>&#x02013;<lpage>797</lpage>. <pub-id pub-id-type="doi">10.1007/s11192-017-2413-z</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Weber</surname> <given-names>I.</given-names></name></person-group> (<year>2014</year>). <article-title>Predicting ideological friends and foes in twitter conflicts</article-title>, in <source>Proceedings of the 23rd International Conference on World Wide Web</source> (<publisher-loc>Seoul</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>575</fpage>&#x02013;<lpage>576</lpage>.</citation></ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name></person-group> (<year>2013</year>). <article-title>How do consumer buzz and traffic in social media marketing predict the value of the firm?</article-title> <source>J. Manage. Inform. Syst.</source> <volume>30</volume>, <fpage>213</fpage>&#x02013;<lpage>238</lpage>. <pub-id pub-id-type="doi">10.2753/MIS0742-1222300208</pub-id></citation></ref>
<ref id="B42">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Oghina</surname> <given-names>A.</given-names></name> <name><surname>Breuss</surname> <given-names>M.</given-names></name> <name><surname>Tsagkias</surname> <given-names>M.</given-names></name> <name><surname>de Rijke</surname> <given-names>M.</given-names></name></person-group> (<year>2012</year>). <article-title>Predicting imdb movie ratings using social media</article-title>, in <source>European Conference on Information Retrieval</source> (<publisher-loc>Barcelona</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>503</fpage>&#x02013;<lpage>507</lpage>.</citation></ref>
<ref id="B43">
<citation citation-type="other"><person-group person-group-type="author"><collab>OMI</collab></person-group> (<year>2007</year>). <source>Online Mendelian Inheritance in Man, OMIM (TM)</source>.</citation></ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ortigosa</surname> <given-names>A.</given-names></name> <name><surname>Carro</surname> <given-names>R. M.</given-names></name> <name><surname>Quiroga</surname> <given-names>J. I.</given-names></name></person-group> (<year>2014</year>). <article-title>Predicting user personality by mining social interactions in Facebook</article-title>. <source>J. Comput. Syst. Sci.</source> <volume>80</volume>, <fpage>57</fpage>&#x02013;<lpage>71</lpage>. <pub-id pub-id-type="doi">10.1016/j.jcss.2013.03.008</pub-id></citation></ref>
<ref id="B45">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Quercia</surname> <given-names>D.</given-names></name> <name><surname>Kosinski</surname> <given-names>M.</given-names></name> <name><surname>Stillwell</surname> <given-names>D.</given-names></name> <name><surname>Crowcroft</surname> <given-names>J.</given-names></name></person-group> (<year>2011</year>). <article-title>Our twitter profiles, our selves: predicting personality with twitter</article-title>, in <source>Privacy, Security, Risk and Trust (PASSAT) and 2011 IEEE Third Inernational Conference on Social Computing (SocialCom), 2011 IEEE Third International Conference on</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>180</fpage>&#x02013;<lpage>185</lpage>.</citation></ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ringelhan</surname> <given-names>S.</given-names></name> <name><surname>Wollersheim</surname> <given-names>J.</given-names></name> <name><surname>Welpe</surname> <given-names>I. M.</given-names></name></person-group> (<year>2015</year>). <article-title>I Like, I Cite? Do facebook likes predict the impact of scientific work?</article-title> <source>PLOS ONE</source> <volume>10</volume>:<fpage>e0134389</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0134389</pub-id><pub-id pub-id-type="pmid">26244779</pub-id></citation></ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Robillard</surname> <given-names>J. M.</given-names></name> <name><surname>Johnson</surname> <given-names>T. W.</given-names></name> <name><surname>Hennessey</surname> <given-names>C.</given-names></name> <name><surname>Beattie</surname> <given-names>B. L.</given-names></name> <name><surname>Illes</surname> <given-names>J.</given-names></name></person-group> (<year>2013</year>). <article-title>Aging 2.0: health information about dementia on twitter</article-title>. <source>PLoS ONE</source> <volume>8</volume>:<fpage>e69861</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0069861</pub-id><pub-id pub-id-type="pmid">23922827</pub-id></citation></ref>
<ref id="B48">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Sakaki</surname> <given-names>T.</given-names></name> <name><surname>Okazaki</surname> <given-names>M.</given-names></name> <name><surname>Matsuo</surname> <given-names>Y.</given-names></name></person-group> (<year>2010</year>). <article-title>Earthquake shakes twitter users: Real-time event detection by social sensors</article-title>, in <source>Proceedings of the 19th International Conference on World Wide Web, WWW &#x00027;10</source> (<publisher-loc>Raleigh, NC: ACM</publisher-loc>), <fpage>851</fpage>&#x02013;<lpage>860</lpage>. <pub-id pub-id-type="doi">10.1145/1772690.1772777</pub-id></citation></ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Santillana</surname> <given-names>M.</given-names></name> <name><surname>Nguyen</surname> <given-names>A. T.</given-names></name> <name><surname>Dredze</surname> <given-names>M.</given-names></name> <name><surname>Paul</surname> <given-names>M. J.</given-names></name> <name><surname>Nsoesie</surname> <given-names>E. O.</given-names></name> <name><surname>Brownstein</surname> <given-names>J. S.</given-names></name></person-group> (<year>2015</year>). <article-title>Combining search, social media, and traditional data sources to improve influenza surveillance</article-title>. <source>PLoS Comput. Biol.</source> <volume>11</volume>:<fpage>e1004513</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1004513</pub-id><pub-id pub-id-type="pmid">26513245</pub-id></citation></ref>
<ref id="B50">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schoen</surname> <given-names>H.</given-names></name> <name><surname>Gayo-Avello</surname> <given-names>D.</given-names></name> <name><surname>Takis Metaxas</surname> <given-names>P.</given-names></name> <name><surname>Mustafaraj</surname> <given-names>E.</given-names></name> <name><surname>Strohmaier</surname> <given-names>M.</given-names></name> <name><surname>Gloor</surname> <given-names>P.</given-names></name></person-group> (<year>2013</year>). <article-title>The power of prediction with social media</article-title>. <source>Inter. Res.</source> <volume>23</volume>, <fpage>528</fpage>&#x02013;<lpage>543</lpage>. <pub-id pub-id-type="doi">10.1108/IntR-06-2013-0115</pub-id></citation></ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Siganos</surname> <given-names>A.</given-names></name> <name><surname>Vagenas-Nanos</surname> <given-names>E.</given-names></name> <name><surname>Verwijmeren</surname> <given-names>P.</given-names></name></person-group> (<year>2014</year>). <article-title>Facebook&#x00027;s daily sentiment and international stock markets</article-title>. <source>J. Econ. Behav. Organ.</source> <volume>107</volume>, <fpage>730</fpage>&#x02013;<lpage>743</lpage>. <pub-id pub-id-type="doi">10.1016/j.jebo.2014.06.004</pub-id></citation></ref>
<ref id="B52">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sinnenberg</surname> <given-names>L.</given-names></name> <name><surname>Buttenheim</surname> <given-names>A.</given-names></name> <name><surname>Padrez</surname> <given-names>K.</given-names></name> <name><surname>Mancheno</surname> <given-names>C.</given-names></name> <name><surname>Ungar</surname> <given-names>L.</given-names></name> <name><surname>Merchant</surname> <given-names>R.</given-names></name></person-group> (<year>2017</year>). <article-title>Twitter as a tool for health research: A systematic review</article-title>. <source>Am. J. Public Health</source> <volume>107</volume>, <fpage>e1</fpage>&#x02013;<lpage>e8</lpage>. <pub-id pub-id-type="doi">10.2105/AJPH.2016.303512</pub-id><pub-id pub-id-type="pmid">27854532</pub-id></citation></ref>
<ref id="B53">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tumasjan</surname> <given-names>A.</given-names></name> <name><surname>Sprenger</surname> <given-names>T. O.</given-names></name> <name><surname>Sandner</surname> <given-names>P. G.</given-names></name> <name><surname>Welpe</surname> <given-names>I. M.</given-names></name></person-group> (<year>2011</year>). <article-title>Election forecasts with twitter: how 140 characters reflect the political landscape</article-title>. <source>Soc. Sci. Comput. Rev.</source> <volume>29</volume>, <fpage>402</fpage>&#x02013;<lpage>418</lpage>. <pub-id pub-id-type="doi">10.1177/0894439310386557</pub-id></citation></ref>
<ref id="B54">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vainio</surname> <given-names>J.</given-names></name> <name><surname>Holmberg</surname> <given-names>K.</given-names></name></person-group> (<year>2017</year>). <article-title>Highly tweeted science articles: who tweets them? an analysis of twitter user profile descriptions</article-title>. <source>Scientometrics</source> <volume>112</volume>, <fpage>345</fpage>&#x02013;<lpage>366</lpage>. <pub-id pub-id-type="doi">10.1007/s11192-017-2368-0</pub-id></citation></ref>
<ref id="B55">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Wasserman</surname> <given-names>S.</given-names></name> <name><surname>Faust</surname> <given-names>K.</given-names></name></person-group> (<year>1994</year>). <source>Social Network Analysis</source>. <publisher-loc>Cambridge; New York, NY</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>.</citation></ref>
<ref id="B56">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wishart</surname> <given-names>D. S.</given-names></name> <name><surname>Knox</surname> <given-names>C.</given-names></name> <name><surname>Guo</surname> <given-names>A. C.</given-names></name> <name><surname>Cheng</surname> <given-names>D.</given-names></name> <name><surname>Shrivastava</surname> <given-names>S.</given-names></name> <name><surname>Tzur</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2007</year>). <article-title>Drugbank: a knowledgebase for drugs, drug actions and drug targets</article-title>. <source>Nucleic Acids Res.</source> <volume>36</volume>, <fpage>D901</fpage>&#x02013;<lpage>D906</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkm958</pub-id><pub-id pub-id-type="pmid">18048412</pub-id></citation></ref>
<ref id="B57">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Won</surname> <given-names>H.-H.</given-names></name> <name><surname>Myung</surname> <given-names>W.</given-names></name> <name><surname>Song</surname> <given-names>G.-Y.</given-names></name> <name><surname>Lee</surname> <given-names>W.-H.</given-names></name> <name><surname>Kim</surname> <given-names>J.-W.</given-names></name> <name><surname>Carroll</surname> <given-names>B. J.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>Predicting national suicide numbers with social media data</article-title>. <source>PLoS ONE</source> <volume>8</volume>:<fpage>e61809</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0061809</pub-id><pub-id pub-id-type="pmid">23630615</pub-id></citation></ref>
<ref id="B58">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>S.</given-names></name> <name><surname>Yan</surname> <given-names>K.</given-names></name> <name><surname>Liu</surname> <given-names>M.</given-names></name> <name><surname>Wu</surname> <given-names>F.</given-names></name></person-group> (<year>2014</year>). <article-title>Opinionflow: visual analysis of opinion diffusion on social media</article-title>. <source>IEEE Trans. Vis. Comput. Graph.</source> <volume>20</volume>, <fpage>1763</fpage>&#x02013;<lpage>1772</lpage>. <pub-id pub-id-type="doi">10.1109/TVCG.2014.2346920</pub-id><pub-id pub-id-type="pmid">26356890</pub-id></citation></ref>
<ref id="B59">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Youyou</surname> <given-names>W.</given-names></name> <name><surname>Kosinski</surname> <given-names>M.</given-names></name> <name><surname>Stillwell</surname> <given-names>D.</given-names></name></person-group> (<year>2015</year>). <article-title>Computer-based personality judgments are more accurate than those made by humans</article-title>. <source>Proc. Natl. Acad. Sci. U.S.A.</source> <volume>112</volume>, <fpage>1036</fpage>&#x02013;<lpage>1040</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1418680112</pub-id><pub-id pub-id-type="pmid">25583507</pub-id></citation></ref>
<ref id="B60">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zeng</surname> <given-names>B.</given-names></name> <name><surname>Gerritsen</surname> <given-names>R.</given-names></name></person-group> (<year>2014</year>). <article-title>What do we know about social media in tourism? a review</article-title>. <source>Tour. Manage. Perspect.</source> <volume>10</volume>, <fpage>27</fpage>&#x02013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1016/j.tmp.2014.01.001</pub-id></citation></ref>
<ref id="B61">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name></person-group> (<year>2012</year>). <article-title>Deciphering word-of-mouth in social media: Text-based metrics of consumer reviews</article-title>. <source>ACM Trans. Manage. Inform. Syst.</source> <volume>3</volume>:<fpage>23</fpage>. <pub-id pub-id-type="doi">10.1145/2151163.2151168</pub-id></citation></ref>
</ref-list>
<fn-group>
<fn fn-type="financial-disclosure"><p><bold>Funding.</bold> MD thanks the Austrian Science Funds for supporting this work (project P30031).</p>
</fn>
</fn-group>
</back>
</article>