<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="article-commentary">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Big Data</journal-id>
<journal-title>Frontiers in Big Data</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Big Data</abbrev-journal-title>
<issn pub-type="epub">2624-909X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdata.2020.00008</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Big Data</subject>
<subj-group>
<subject>General Commentary</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Commentary: A robust data-driven approach identifies four personality types across four large data sets</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Katahira</surname> <given-names>Kentaro</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/31069/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Kunisato</surname> <given-names>Yoshihiko</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/573483/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Yamashita</surname> <given-names>Yuichi</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/13049/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Suzuki</surname> <given-names>Shinsuke</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/78389/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Psychological and Cognitive Sciences, Graduate School of Informatics, Nagoya University</institution>, <addr-line>Nagoya</addr-line>, <country>Japan</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Psychology, Senshu University</institution>, <addr-line>Kawasaki</addr-line>, <country>Japan</country></aff>
<aff id="aff3"><sup>3</sup><institution>Department of Information Medicine, National Institute of Neuroscience, National Center of Neurology and Psychiatry</institution>, <addr-line>Tokyo</addr-line>, <country>Japan</country></aff>
<aff id="aff4"><sup>4</sup><institution>Brain, Mind and Markets Laboratory, Department of Finance, Faculty of Business and Economics, The University of Melbourne</institution>, <addr-line>Parkville, VIC</addr-line>, <country>Australia</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Nikolaos Vasiloglou, Relational AI, Atlanta, Georgia, United States</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Ilias Fountalis, Relational AI, Berkeley, California, United States</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Kentaro Katahira <email>katahira.kentaro&#x00040;b.mbox.nagoya-u.ac.jp</email></corresp>
<corresp id="c002">Shinsuke Suzuki <email>shinsuke.szk&#x00040;gmail.com</email></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Machine Learning and Artificial Intelligence, a section of the journal Frontiers in Big Data</p></fn></author-notes>
<pub-date pub-type="epub">
<day>25</day>
<month>02</month>
<year>2020</year>
</pub-date>
<pub-date pub-type="collection">
<year>2020</year>
</pub-date>
<volume>3</volume>
<elocation-id>8</elocation-id>
<history>
<date date-type="received">
<day>04</day>
<month>11</month>
<year>2019</year>
</date>
<date date-type="accepted">
<day>10</day>
<month>02</month>
<year>2020</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2020 Katahira, Kunisato, Yamashita and Suzuki.</copyright-statement>
<copyright-year>2020</copyright-year>
<copyright-holder>Katahira, Kunisato, Yamashita and Suzuki</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<related-article id="RA1" related-article-type="commentary-article" journal-id="Nat Hum Behav" journal-id-type="nlm-ta" vol="2" page="735" xlink:href="31406291" ext-link-type="pubmed">A Commentary on <article-title>A robust data-driven approach identifies four personality types across four large data sets</article-title> by Gerlach, M., Farb, B., Revelle, W., and Amaral, L. A. N. (2018). Nat. Hum. Behav. 2, 735&#x02013;742. doi: <object-id>10.1038/s41562-018-0419-z</object-id></related-article>
<kwd-group>
<kwd>personality types</kwd>
<kwd>cluster</kwd>
<kwd>Gaussian mixture models</kwd>
<kwd>skewness</kwd>
<kwd>statistical modeling</kwd>
</kwd-group>
<contract-num rid="cn001">16H05957</contract-num>
<contract-num rid="cn001">JP17H05933</contract-num>
<contract-num rid="cn001">JP17H05946</contract-num>
<contract-num rid="cn001">JP17H06039</contract-num>
<contract-num rid="cn001">JP19H04902</contract-num>
<contract-sponsor id="cn001">Japan Society for the Promotion of Science<named-content content-type="fundref-id">10.13039/501100001691</named-content></contract-sponsor>
<contract-sponsor id="cn002">Japan Science and Technology Agency<named-content content-type="fundref-id">10.13039/501100002241</named-content></contract-sponsor>
<counts>
<fig-count count="1"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="9"/>
<page-count count="3"/>
<word-count count="1825"/>
</counts>
</article-meta>
</front>
<body>
<p>What kinds of personalities do humans have? Can these personalities be classified into several discrete types? These issues have been of considerable concern as they could potentially provide deeper understanding of the nature of human individuality and mental disorders. Recently, Gerlach et al. (<xref ref-type="bibr" rid="B3">2018</xref>) addressed these issues by applying established machine-learning techniques to big data (more than 1.5 million respondents in total). They found four &#x0201C;meaningful clusters&#x0201D; in personality dimensions, suggesting the existence of at least four personality types. Here, we propose an alternative interpretation of their result: a skewed distribution with no cluster structures in personality space can erroneously lead to the seemingly meaningful clusters.</p>
<sec id="s1">
<title>Distribution of Personality</title>
<p>It is now widely accepted that human personality is characterized by five dimensions (traits or factors), which consist of neuroticism, extraversion, openness, agreeableness, and conscientiousness (Goldberg, <xref ref-type="bibr" rid="B4">1990</xref>). Yet, understanding of how human personalities are distributed in this five-dimensional (5D) space remains elusive. There exist at least two major views: the <italic>dimensional</italic> view and <italic>categorical</italic> view. The dimensional view supposes that the distribution is unimodal and individuals&#x00027; personalities are continuously distributed in the 5D space. The categorical view posits that there are multiple clusters (dense regions) in personality space (i.e., the distribution is multimodal) and each individual can be classified into one of these clusters. In personality theory, such clusters are referred to as personality &#x0201C;types.&#x0201D; While common analytical tools of personality (e.g., factor analysis) are constructed based on the dimensional view, some researchers have considered the categorical view and claimed the existence of personality types (e.g., Robins et al., <xref ref-type="bibr" rid="B8">1996</xref>).</p>
<p>A recent study by Gerlach et al. (<xref ref-type="bibr" rid="B3">2018</xref>) aimed to identify personality types in a highly robust manner based on four large data sets. Their analyses identified four meaningful clusters deemed as personality types. However, in the present study, we suggest that Gerlach et al.&#x00027;s analysis cannot necessarily exclude the dimensional view. In particular, we demonstrate that a skewed distribution without a cluster structure can lead to spurious clusters that are deemed &#x0201C;meaningful clusters&#x0201D; or &#x0201C;types&#x0201D; by Gerlach et al.&#x00027;s analysis.</p>
</sec>
<sec id="s2">
<title>Procedure of Analysis and Its Pitfall</title>
<p>The core part of Gerlach et al.&#x00027;s analysis is fitting Gaussian mixture models (GMM) to the five factor scores that provide the positions of individuals in the 5D space (the procedure adopted in Gerlach et al. is briefly described in <xref ref-type="supplementary-material" rid="SM1">Supplementary Text 1</xref>). GMM represents a given distribution by weighted sum of a finite number of Gaussian (normal) distributions. If there are indeed cluster structures and each cluster can be represented by single Gaussian distribution, each Gaussian component may correspond to a single cluster (<xref ref-type="fig" rid="F1">Figures 1A,B</xref>). To examine whether each Gaussian component is a truly meaningful cluster, they performed a statistical test based on the null model that assumes the five factors are distributed independently of each other. As a result, they identified four Gaussian components as meaningful clusters.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>How clustering based on a Gaussian mixture model works for two types of distributions. Comparison of a case with a clear cluster structure <bold>(A,B)</bold> and a case with a skewed distribution <bold>(C,D)</bold>. <bold>(A,C)</bold> The right panel for each shows the scatter plot of synthesized data and the left shows the estimated probability density from the samples. <bold>(B,D)</bold> Result of fitting Gaussian mixture models. The left panel shows the probability density functions of the fitted GMM. The right panel indicates the one standard deviation contours and the centers of the estimated Gaussian components (the scatter plots that are identical with the above panels are shown). The Gaussian components deemed &#x0201C;meaningful clusters&#x0201D; by Gerlach et al.&#x00027;s procedure are indicated by green arrows (see <xref ref-type="supplementary-material" rid="SM1">Supplementary Text 2</xref> and <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 1</xref> for details).</p></caption>
<graphic xlink:href="fdata-03-00008-g0001.tif"/>
</fig>
<p>However, even if the target distribution is unimodal and there is no cluster structure, similar results (i.e., emergence of meaningful clusters) can be obtained when the distribution has skewness (<xref ref-type="fig" rid="F1">Figures 1C,D</xref>). In the simulation, we applied the procedure to 2D data artificially generated from a unimodal, skewed distribution (see <xref ref-type="supplementary-material" rid="SM1">Supplementary Text 2</xref>). The GMM has a property that can fit a non-Gaussian distribution by combining multiple Gaussian components (Roeder and Wasserman, <xref ref-type="bibr" rid="B9">1997</xref>; Bauer and Curran, <xref ref-type="bibr" rid="B1">2004</xref>). In this case, the best fitted GMM had seven components to represent the skewed distribution (<xref ref-type="fig" rid="F1">Figure 1D</xref>). Among these components, three were deemed &#x0201C;meaningful clusters&#x0201D; given that the density of each component center was significantly higher than the null model (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 1</xref>). It should be noted that, in addition to the skewness of marginal distribution (distribution of each variable where the other variable is marginalized out), the dependence among factors is necessary for the emergence of spurious meaningful clusters. This is because the null model has the same marginals as the original distribution (as indicated by the comparison of <xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 1B,C</xref>).</p>
<p>This mechanism could have influenced Gerlach et al.&#x00027;s results. The distributions of their factor scores were found to be skewed (<xref ref-type="supplementary-material" rid="SM1">Supplementary Text 3</xref>), and to some extent there appears to be a statistical dependence between different factors, i.e., the shapes of 2D joint distributions of two factors differ from the product of the marginals (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 2</xref> of Gerlach et al.). Based on these considerations, we suggest that the results of Gerlach et al. do not necessarily reflect the cluster structures and instead could reflect skewness of the distribution. The distribution of factor scores can be skewed, for example, by range restriction (discretization) of responses to questionnaire-items (see Rice and Richardson, <xref ref-type="bibr" rid="B7">2014</xref>). The dependence among factor scores can arise due to a rotation procedure in the factor analysis; non-linear dependence between dimensions arises by rotating non-Gaussian variables (Hyv&#x000E4;rinen and Oja, <xref ref-type="bibr" rid="B5">2000</xref>).</p>
<p>Our discussion is closely related to another commentary on Gerlach et al. (<xref ref-type="bibr" rid="B3">2018</xref>) by Freudenstein et al. (<xref ref-type="bibr" rid="B2">2019</xref>). By reanalyzing the Johnson-300 data set (Johnson, <xref ref-type="bibr" rid="B6">2014</xref>), Freudenstein et al. (<xref ref-type="bibr" rid="B2">2019</xref>) pointed out that only less than half (42%) of the respondents was classified into four meaningful clusters. The mechanism that we suggested provides a natural explanation to this result. That is, if meaningful clusters just represent the edge of the skewed distribution rather than a higher density region in the fitted model (as in <xref ref-type="fig" rid="F1">Figure 1D</xref>), the majority of the samples are not necessarily classified into such clusters. Indeed, only 45.5% of the samples in <xref ref-type="fig" rid="F1">Figure 1D</xref> are classified into one of the three &#x0201C;meaningful clusters&#x0201D; (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 1G</xref>).</p>
<p>In conclusion, we have demonstrated the possibility that the skewness of the distribution can influence the personality types reported by Gerlach et al. (<xref ref-type="bibr" rid="B3">2018</xref>), although we did not formally evaluate how much their results indeed suffered from this skewness. A formal evaluation may require novel statistical methods that can represent and quantify the skewness of a multivariate distribution appropriately. Our demonstration suggests that, despite the seminal work by Gerlach, it is still an open question whether the distribution of personality should be characterized as categorical, dimensional, or their intermediate.</p>
</sec>
<sec id="s3">
<title>Data Availability Statement</title>
<p>The R script used for the simulation is available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/kkatahira/personality_skewness">https://github.com/kkatahira/personality_skewness</ext-link>.</p>
</sec>
<sec id="s4">
<title>Author Contributions</title>
<p>KK, YK, YY, and SS designed the research. KK conducted simulations and analyzed the data. KK and SS drafted the manuscript. YK and YY provided critical revisions.</p>
<sec>
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
</sec>
</body>
<back>
<sec sec-type="supplementary-material" id="s5">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fdata.2020.00008/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fdata.2020.00008/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bauer</surname> <given-names>D. J.</given-names></name> <name><surname>Curran</surname> <given-names>P. J.</given-names></name></person-group> (<year>2004</year>). <article-title>The integration of continuous and discrete latent variable models: potential problems and promising opportunities</article-title>. <source>Psychol. Methods</source> <volume>9</volume>, <fpage>3</fpage>&#x02013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1037/1082-989X.9.1.3</pub-id><pub-id pub-id-type="pmid">15053717</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Freudenstein</surname> <given-names>J.-P.</given-names></name> <name><surname>Strauch</surname> <given-names>C.</given-names></name> <name><surname>Mussel</surname> <given-names>P.</given-names></name> <name><surname>Ziegler</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>Four personality types may be neither robust nor exhaustive</article-title>. <source>Nat. Hum. Behav.</source> <volume>3</volume>, <fpage>1045</fpage>&#x02013;<lpage>1046</lpage>. <pub-id pub-id-type="doi">10.1038/s41562-019-0721-4</pub-id><pub-id pub-id-type="pmid">31527680</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gerlach</surname> <given-names>M.</given-names></name> <name><surname>Farb</surname> <given-names>B.</given-names></name> <name><surname>Revelle</surname> <given-names>W.</given-names></name> <name><surname>Nunes Amaral</surname> <given-names>L. A.</given-names></name></person-group> (<year>2018</year>). <article-title>A robust data-driven approach identifies four personality types across four large data sets</article-title>. <source>Nat. Hum. Behav.</source> <volume>2</volume>, <fpage>735</fpage>&#x02013;<lpage>742</lpage>. <pub-id pub-id-type="doi">10.1038/s41562-018-0419-z</pub-id><pub-id pub-id-type="pmid">31406291</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Goldberg</surname> <given-names>L. R.</given-names></name></person-group> (<year>1990</year>). <article-title>An alternative description of personality: the big-five factor structure</article-title>. <source>J. Pers. Soc. Psychol.</source> <volume>59</volume>, <fpage>1216</fpage>&#x02013;<lpage>1229</lpage>.</citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hyv&#x000E4;rinen</surname> <given-names>A.</given-names></name> <name><surname>Oja</surname> <given-names>E.</given-names></name></person-group> (<year>2000</year>). <article-title>Independent component analysis: algorithms and applications</article-title>. <source>Neural Netw.</source> <volume>13</volume>, <fpage>411</fpage>&#x02013;<lpage>430</lpage>. <pub-id pub-id-type="doi">10.1016/S0893-6080(00)00026-5</pub-id><pub-id pub-id-type="pmid">10946390</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>J. A.</given-names></name></person-group> (<year>2014</year>). <article-title>Measuring thirty facets of the Five Factor Model with a 120-item public domain inventory: development of the IPIP-NEO-120</article-title>. <source>J. Res. Pers.</source> <volume>51</volume>, <fpage>78</fpage>&#x02013;<lpage>89</lpage>. <pub-id pub-id-type="doi">10.1016/j.jrp.2014.05.003</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rice</surname> <given-names>K. G.</given-names></name> <name><surname>Richardson</surname> <given-names>C. M. E.</given-names></name></person-group> (<year>2014</year>). <article-title>Classification challenges in perfectionism</article-title>. <source>J. Couns. Psychol.</source> <volume>61</volume>, <fpage>641</fpage>&#x02013;<lpage>648</lpage>. <pub-id pub-id-type="doi">10.1037/cou0000040</pub-id><pub-id pub-id-type="pmid">25111705</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Robins</surname> <given-names>R. W.</given-names></name> <name><surname>John</surname> <given-names>O. P.</given-names></name> <name><surname>Caspi</surname> <given-names>A.</given-names></name> <name><surname>Moffitt</surname> <given-names>T. E.</given-names></name> <name><surname>Stouthamer-Loeber</surname> <given-names>M.</given-names></name></person-group> (<year>1996</year>). <article-title>Resilient, overcontrolled, and undercontrolled boys: three replicable personality types</article-title>. <source>J. Pers. Soc. Psychol.</source> <volume>70</volume>, <fpage>157</fpage>&#x02013;<lpage>171</lpage>. <pub-id pub-id-type="doi">10.1037//0022-3514.70.1.157</pub-id><pub-id pub-id-type="pmid">8558407</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Roeder</surname> <given-names>K.</given-names></name> <name><surname>Wasserman</surname> <given-names>L.</given-names></name></person-group> (<year>1997</year>). <article-title>Practical Bayesian density estimation using mixtures of normals</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>92</volume>, <fpage>894</fpage>&#x02013;<lpage>902</lpage>.</citation></ref>
</ref-list>
<fn-group>
<fn fn-type="financial-disclosure"><p><bold>Funding.</bold> This work was supported by JSPS KAKENHI Grant Numbers JP17H05946 and JP19H04902 (KK), 16H05957 (YK), JP17H06039 (YY) and JP17H05933 (SS), and JST CREST Grant Number JPMJCR16E2 (YY).</p>
</fn>
</fn-group>
</back>
</article>