<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Behav. Neurosci.</journal-id>
<journal-title>Frontiers in Behavioral Neuroscience</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Behav. Neurosci.</abbrev-journal-title>
<issn pub-type="epub">1662-5153</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fnbeh.2020.581154</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Behavioral Neuroscience</subject>
<subj-group>
<subject>Technology Report</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>MacaquePose: A Novel &#x0201C;In the Wild&#x0201D; Macaque Monkey Pose Dataset for Markerless Motion Capture</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Labuguen</surname> <given-names>Rollyn</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/867323/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Matsumoto</surname> <given-names>Jumpei</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/198757/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Negrete</surname> <given-names>Salvador Blanco</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1180415/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Nishimaru</surname> <given-names>Hiroshi</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/367616/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Nishijo</surname> <given-names>Hisao</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/26080/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Takada</surname> <given-names>Masahiko</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Go</surname> <given-names>Yasuhiro</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Inoue</surname> <given-names>Ken-ichi</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/104205/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Shibata</surname> <given-names>Tomohiro</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c003"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2424/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Human Intelligence Systems, Graduate School of Life Science and Systems Engineering, Kyushu Institute of Technology</institution>, <addr-line>Kitakyushu</addr-line>, <country>Japan</country></aff>
<aff id="aff2"><sup>2</sup><institution>Systems Emotional Science, University of Toyama</institution>, <addr-line>Toyama</addr-line>, <country>Japan</country></aff>
<aff id="aff3"><sup>3</sup><institution>Systems Neuroscience Section, Department of Neuroscience, Primate Research Institute, Kyoto University</institution>, <addr-line>Inuyama</addr-line>, <country>Japan</country></aff>
<aff id="aff4"><sup>4</sup><institution>Cognitive Genomics Research Group, Exploratory Research Center on Life and Living Systems (ExCELLS) National Institutes of Natural Sciences</institution>, <addr-line>Okazaki</addr-line>, <country>Japan</country></aff>
<aff id="aff5"><sup>5</sup><institution>Department of System Neuroscience, National Institute for Physiological Sciences</institution>, <addr-line>Okazaki</addr-line>, <country>Japan</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Gernot Riedel, University of Aberdeen, United Kingdom</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Joshua C. Brumberg, Queens College (CUNY), United States; Valeria Manera, University of Nice Sophia Antipolis, France</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Rollyn Labuguen <email>labuguen-rollyn&#x00040;edu.brain.kyutech.ac.jp</email></corresp>
<corresp id="c002">Jumpei Matsumoto <email>jm&#x00040;med.u-toyama.ac.jp</email></corresp>
<corresp id="c003">Tomohiro Shibata <email>tom&#x00040;brain.kyutech.ac.jp</email></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Pathological Conditions, a section of the journal Frontiers in Behavioral Neuroscience</p></fn></author-notes>
<pub-date pub-type="epub">
<day>18</day>
<month>01</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2020</year>
</pub-date>
<volume>14</volume>
<elocation-id>581154</elocation-id>
<history>
<date date-type="received">
<day>08</day>
<month>07</month>
<year>2020</year>
</date>
<date date-type="accepted">
<day>15</day>
<month>12</month>
<year>2020</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2021 Labuguen, Matsumoto, Negrete, Nishimaru, Nishijo, Takada, Go, Inoue and Shibata.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Labuguen, Matsumoto, Negrete, Nishimaru, Nishijo, Takada, Go, Inoue and Shibata</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract><p>Video-based markerless motion capture permits quantification of an animal&#x00027;s pose and motion, with a high spatiotemporal resolution in a naturalistic context, and is a powerful tool for analyzing the relationship between the animal&#x00027;s behaviors and its brain functions. Macaque monkeys are excellent non-human primate models, especially for studying neuroscience. Due to the lack of a dataset allowing training of a deep neural network for the macaque&#x00027;s markerless motion capture in the naturalistic context, it has been challenging to apply this technology for macaques-based studies. In this study, we created MacaquePose, a novel open dataset with manually labeled body part positions (keypoints) for macaques in naturalistic scenes, consisting of &#x0003E;13,000 images. We also validated the application of the dataset by training and evaluating an artificial neural network with the dataset. The results indicated that the keypoint estimation performance of the trained network was close to that of a human-level. The dataset will be instrumental to train/test the neural networks for markerless motion capture of the macaques and developments of the algorithms for the networks, contributing establishment of an innovative platform for behavior analysis for non-human primates for neuroscience and medicine, as well as other fields using macaques as a model organism.</p></abstract>
<kwd-group>
<kwd>non-human primate</kwd>
<kwd>deep learning</kwd>
<kwd>pose estimation</kwd>
<kwd>large-scale dataset</kwd>
<kwd>behavior analysis</kwd>
</kwd-group>
<contract-num rid="cn001">16H06534</contract-num>
<contract-num rid="cn001">19H04984</contract-num>
<contract-sponsor id="cn001">Japan Society for the Promotion of Science<named-content content-type="fundref-id">10.13039/501100001691</named-content></contract-sponsor>
<counts>
<fig-count count="3"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="30"/>
<page-count count="8"/>
<word-count count="4797"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>Behavior analyses are fundamental for understanding brain functions and malfunctions (Datta et al., <xref ref-type="bibr" rid="B9">2019</xref>). Motion capture technologies allow the quantification of animal&#x00027;s pose and motion with a high spatiotemporal resolution enabling the study of the relationship between various brain functions and behaviors (Vargas-Irwin et al., <xref ref-type="bibr" rid="B26">2008</xref>; Nagasaka et al., <xref ref-type="bibr" rid="B21">2011</xref>; Mathis and Mathis, <xref ref-type="bibr" rid="B20">2020</xref>). However, attaching the physical markers for the motion capture is often not practical for animal studies, as the markers themselves disturb/change the subject&#x00027;s behavior (Nakamura et al., <xref ref-type="bibr" rid="B22">2016</xref>; Mathis et al., <xref ref-type="bibr" rid="B18">2018</xref>; Berger et al., <xref ref-type="bibr" rid="B2">2020</xref>). Thanks to recent advances in machine vision using deep learning, the video-based markerless motion capture has been developed to a level permitting practical use (Mathis and Mathis, <xref ref-type="bibr" rid="B20">2020</xref>), in which an artificial neural network predicts the location of body parts in a video without the requirement for physical markers, and enabled successful behavioral studies in rodents (e.g., Cregg et al., <xref ref-type="bibr" rid="B8">2020</xref>; Dooley et al., <xref ref-type="bibr" rid="B11">2020</xref>; Mathis and Mathis, <xref ref-type="bibr" rid="B20">2020</xref>). Macaque monkeys are an important non-human primate model, particularly in the field of neuroscience (Kalin and Shelton, <xref ref-type="bibr" rid="B16">2006</xref>; Capitanio and Emborg, <xref ref-type="bibr" rid="B6">2008</xref>; Nelson and Winslow, <xref ref-type="bibr" rid="B24">2008</xref>; Watson and Platt, <xref ref-type="bibr" rid="B28">2012</xref>). The robust markerless motion capture using deep learning will allow studying various complex naturalistic behaviors in detail, and permit investigation of relationship between naturalistic behaviors and brain functions (Datta et al., <xref ref-type="bibr" rid="B9">2019</xref>; Mathis and Mathis, <xref ref-type="bibr" rid="B20">2020</xref>). Analyzing naturalistic behavior is crucial in brain-science, since the brain evolved from natural behaviors, and various behaviors, such as complex social behaviors, can be observed only in the natural situations (Datta et al., <xref ref-type="bibr" rid="B9">2019</xref>; Mathis and Mathis, <xref ref-type="bibr" rid="B20">2020</xref>). The deep neural networks usually require manually labeled body parts positions in thousands of pictures to learn prediction of the body parts positions in an arbitrary picture. However, such a large labeled dataset for macaque monkeys in the naturalistic scene has not been developed. The lack of this dataset limits the markerless motion capture technology applications for macaque studies (Bala et al., <xref ref-type="bibr" rid="B1">2020</xref>; Berger et al., <xref ref-type="bibr" rid="B2">2020</xref>).</p>
<p>To overcome this limitation, we created a novel open dataset of the manually labeled body part positions (keypoints) for macaques in naturalistic scenes, consisting of &#x0003E;13,000 pictures. We also validated the usefulness of the dataset by training and evaluating an artificial neural network with the dataset. The results revealed that the keypoint estimation performance of the trained network was close to that of a human level. Our dataset will provide basis for markerless motion capture on the naturalistic behaviors.</p>
</sec>
<sec sec-type="materials and methods" id="s2">
<title>Materials and Methods</title>
<sec>
<title>Image Data Collection</title>
<p>A total of 13,083 images of macaque monkeys were obtained from the internet or were captured in zoos or the Primate Research Institute of Kyoto University. Images on the internet were obtained through Google Open Images (<ext-link ext-link-type="uri" xlink:href="https://storage.googleapis.com/openimages/web/index.html">https://storage.googleapis.com/openimages/web/index.html</ext-link>) by searching for images with a &#x0201C;macaque&#x0201D; tag. Pictures zoos were acquired from the outside of the breeding areas, with granted permission provided by the zoos. Images in the Primate Research Institute of Kyoto University were taken in the breeding fields without causing any specific interventions to the monkeys. The photo capturing in the institute was approved by the Animal Welfare and Animal Care Committee of the Primate Research Institute of Kyoto University and conducted in accordance with the Guidelines for the Care and Use of Animals of the Primate Research Institute, Kyoto University.</p>
</sec>
<sec>
<title>Image Data Annotation</title>
<p>The positions of 17 keypoints (nose and left and right ears, eyes, shoulders, elbows, wrists, hips, knees, and ankles) and instance segmentation for each monkey in each of the pictures were first annotated by non-researchers employed by Baobab Inc. (Chiyoda-ku, Japan). As further expertise was required for high-quality monkey annotation, the keypoint labels were then further refined with eight researchers working with macaques at Kyoto University and the University of Toyama, using a custom-made Python script. The keypoints were labeled according to the following guidelines: (1) The keypoints of the limbs (shoulder, elbow, wrist, hip, knee, and ankle) should be located at the center of the joint rotation. (2) Ear, eye, and nose keypoints should be located at the entrance of the ear canal, the center of eye ball, in the middle position between the entrances of the two nostrils, respectively. (3) A keypoint was annotated, if its position was predictable despite being occluded, except for ears, eyes, and nose facing the back side of the picture. The resultant labels were compatible with the Microsoft COCO Keypoint Dataset (Lin et al., <xref ref-type="bibr" rid="B17">2014</xref>).</p>
</sec>
<sec>
<title>Performance Evaluation of an Artificial Neural Network Trained With the Present Dataset</title>
<p>To validate the present dataset, we trained an artificial neural network estimating keypoint positions by using the DeepLabCut algorithm proposed for markerless pose estimation in animals (Mathis et al., <xref ref-type="bibr" rid="B18">2018</xref>). Briefly, DeepLabCut is a versatile and straightforward algorithm in which the 50-layer ResNet pre-trained for the ImageNet object recognition task (He et al., <xref ref-type="bibr" rid="B14">2016</xref>) is transferred for the keypoint estimation by replacing the classification layer at the output of the ResNet with the deconvolutional layers (see <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 1</xref> for the network architecture of the DeepLabCut). The utilization of transfer learning allows DeepLabCut algorithm to require a relatively small number of training data (Nath et al., <xref ref-type="bibr" rid="B23">2019</xref>). The accuracy of keypoint prediction with the DeepLabCut algorithm has been shown to be comparable or superior to similar algorithms recently suggested for the animal pose estimation (Graving et al., <xref ref-type="bibr" rid="B13">2019</xref>). DeepLabCut is a widely used algorithm in the field of neuroscience, because of its user-friendly interface and documentations, and a well-established community, as well as its good performance. Due to DeepLabCut (version 2.1.6) currently not supporting the estimation of keypoints in multiple animals in a picture, we first generated single monkey images by masking the monkeys in the images except for one monkey and used these masked images as the input. Some monkey images in the dataset were excluded due to technical reasons (e.g., a keypoint of one monkey is covered by the mask of the other monkeys). Then, the images were resized to adjust the length to 640 pixels while maintaining the images aspect ratio, before inputting it into the network. In total, 15,476 single monkey images were generated. Among the images, 14,697 single monkey images were used to train the network and the rest (779 images) were used to evaluate the trained network. The network model was implemented using Python scripts with Tensorflow support. The network is trained up to a million iterations. The training took 20 h to complete on a Nvidia GTX 1080 Ti graphics processing unit workstation.</p>
<p>The keypoint prediction by the trained network was evaluated. A predicted keypoint with confidence level &#x0003E; 0.4 was defined to be detected. First, minor cases showing the keypoint(s) detected outside the monkey segment were eliminated. True positive, true negative, false positive, and false negative detections were counted. A keypoint was defined as a correct detection by the network (true positive detection) if there was the corresponding ground truth keypoint in the same image, regardless of its location in the image. For true positive cases, the Euclidean distance between the predicted and ground truth position was calculated as the error of position estimation. The error value represented the normalized value with respect to the length of the monkey&#x00027;s bounding box due to variations in the size of the monkey in the images. To check the accuracy of the predicted pose, the root-mean-square error (RMSE) was also calculated with all keypoints in each image (Mathis et al., <xref ref-type="bibr" rid="B18">2018</xref>). To evaluate the error values of the keypoint position predictions, we investigated human variability by calculating the errors between the keypoint positions annotated by two humans. Finally, among the true positive cases, numbers of limb keypoints misattributed as the homologous keypoint on another limb (e.g., left wrist misattributed as right wrist, left ankle, or right ankle) are also counted. Specifically, <italic>i</italic>-th keypoint were defined as being misattributed to a homologous <italic>j</italic>-th keypoint on another limb, if the keypoint satisfies both of the following two conditions: (1) the normalized position error of the <italic>i</italic>-th keypoint was &#x0003E;20%; (2) the ground truth positions of <italic>j</italic>-th keypoint was closest to the predicted position of <italic>i</italic>-th keypoint among the ground truth positions of homologous keypoints. Note that these keypoint predictions obtained with the trained network were evaluated on set of test images which are not included during training of the network.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<p>In total, the present data set contains keypoints and instance segmentation of 16,393 monkeys in 13,083 pictures. Each picture captures 1&#x02013;5 monkeys; 10,630 pictures with a single monkey and 2,453 pictures with multiple monkeys (<xref ref-type="fig" rid="F1">Figure 1</xref>).</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Examples of pictures and labels in the present dataset.</p></caption>
<graphic xlink:href="fnbeh-14-581154-g0001.tif"/>
</fig>
<p>To validate the dataset, we trained an artificial network with 14,697 single monkey images in the dataset using the DeepLabCut algorithm (Mathis et al., <xref ref-type="bibr" rid="B18">2018</xref>). The performance of the keypoint prediction of the trained network was evaluated on 779 test images unseen during training. <xref ref-type="fig" rid="F2">Figure 2</xref> shows examples of the keypoint predictions (see <xref ref-type="supplementary-material" rid="SM4">Supplementary Video 1</xref> for keypoint prediction for movies). Among 779 images, 24 images had keypoint(s) detected outside the target monkey. Most of them (17 images) were due to imperfect masks of the other monkeys in the picture (<xref ref-type="supplementary-material" rid="SM2">Supplementary Figure 2</xref>). The &#x0201C;out of monkey&#x0201D; cases were removed from the analysis.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Examples of test image predictions. Test images (left), the ground truth keypoint positions (center) and the position predicted by the artificial neural network trained with the present dataset using the DeepLabCut algorithm (right; <bold>a&#x02013;h</bold>). The inset (top right corner) shows color codes of the keypoints. Red arrows in <bold>(h)</bold> indicate a misattribution error.</p></caption>
<graphic xlink:href="fnbeh-14-581154-g0002.tif"/>
</fig>
<p>We investigated the performance of keypoint detection (judging whether a keypoint exists anywhere in the picture or not) of the trained network (<xref ref-type="supplementary-material" rid="SM5">Supplementary Table 1</xref>). Both precision and recall of the keypoint detection were approximately 90% in most of the keypoints, suggesting good detection performance.</p>
<p>To further investigate the accuracy of the detected keypoints, the error of predicted position was calculated for each keypoint (<xref ref-type="fig" rid="F3">Figure 3</xref>, gray bar). The prediction&#x00027;s RMSE values (6.02 &#x000B1; 0.18%; mean &#x000B1; s.e.m) were comparable to those between the positions manually labeled by two different people (5.74 &#x000B1; 0.16%; <italic>p</italic> = 0.250, student&#x00027;s <italic>t</italic>-test), suggesting that the trained network&#x00027;s performance in the keypoint position estimation was close to the human level. The effect of the label refinement by researchers was also examined. The error values for the dataset before the refinement were calculated as previously mentioned. The analyses revealed that the averaged RMSE values after the refinement (6.02 &#x000B1; 0.18%) were significantly smaller than the one before the refinement (7.83 &#x000B1; 0.23%; <italic>p</italic> = 9.13 &#x000D7; 10<sup>&#x02212;10</sup>, Student&#x00027;s <italic>t</italic>-test; see <xref ref-type="supplementary-material" rid="SM3">Supplementary Figure 3</xref> for the error value of each keypoint). The result suggests that the network trained with the dataset refined by the researchers predicted the keypoint more consistently.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Averaged error of predicted (gray) and manual labeled (white) positions of each keypoint comparing with the ground truth positions. Error bars represent standard error of the mean (s.e.m).</p></caption>
<graphic xlink:href="fnbeh-14-581154-g0003.tif"/>
</fig>
<p>In some cases, we observed that the predicted positions of monkey&#x00027;s keypoints on a limb were located on homologous keypoints on another limb (<xref ref-type="fig" rid="F2">Figure 2h</xref>, see also <xref ref-type="supplementary-material" rid="SM4">Supplementary Video 1</xref>).</p>
<p>We then quantified the frequency of such misattribution errors (<xref ref-type="table" rid="T2">Table 2</xref>). The misattribution errors were relatively frequent in the distal keypoints (elbow, knee, wrist, and ankle), especially on the hind limbs. The total number of images having at least one misattribution error was 114 (15%). The result shows that there is still room for improvement, although the RMSE indicates human-level performance.</p>
</sec>
<sec id="s4">
<title>Discussions</title>
<p>In this study, we created a novel large dataset of labeled keypoints of macaque monkeys (<xref ref-type="fig" rid="F1">Figure 1</xref>, <xref ref-type="table" rid="T1">Table 1</xref>). The keypoint estimation performance of the neural network trained with the dataset was close to that of human level (<xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>; <xref ref-type="supplementary-material" rid="SM4">Supplementary Video 1</xref>), demonstrating the usefulness of the present dataset. We also found a significant improvement of the network prediction after the label refinement by researchers using macaques (<xref ref-type="supplementary-material" rid="SM3">Supplementary Figure 3</xref>), suggesting that the refinement successfully enhanced the quality of the dataset. Although we tested only single monkey images due to the limitation of the algorithm, the present dataset should be useful to train/test the network for multi-animal motion capture. The label formats in the present dataset are compatible with those used in the COCO dataset for humans (Lin et al., <xref ref-type="bibr" rid="B17">2014</xref>), allowing users to try a direct application of algorithms developed for human motion capture. A recent study also proposed a similarly sized labeled dataset of rhesus monkeys (Bala et al., <xref ref-type="bibr" rid="B1">2020</xref>). In the study, they captured freely moving monkeys in a 2.5 m cubic cage with 62 cameras surrounding the cage. The multi-camera system allows to reconstruct 3D pose after manually labeling images simultaneously captured from 3 to 4 views. Interestingly, the reconstructed 3D pose is projected to the other around 60 views and enables automatically labeling the images from all the views. This cross-view data augmentation allowed them to get labels of around 200,000 monkey images with 33,192 images labeled manually. The critical difference between the two datasets is that pictures in their dataset were taken in a single laboratory environment, our dataset consists of pictures taken in many different naturalistic environments. Thanks to the &#x0201C;in-the-wild&#x0201D; aspect of the collected pictures, the present data set has rich variations in pose, body shape, lighting, and background in naturalistic contexts. The rich variation will help to train and test artificial neural networks with high generalizability (Mathis et al., <xref ref-type="bibr" rid="B19">2019</xref>). Thus, the two datasets will compensate each other to train or test better neural networks in future studies. As the dataset formats (i.e., which keypoints are labeled) were slightly different among the two datasets, some additional efforts are necessary to combine or compare these two datasets directly.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>The number of pictures and monkeys in the present dataset from each source.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Source</bold></th>
<th valign="top" align="left"><bold>Monkey Species</bold></th>
<th valign="top" align="center"><bold>No. of Pictures</bold></th>
<th valign="top" align="center"><bold>No. of Monkeys</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Toyama Municipal Family Park Zoo</td>
<td valign="top" align="left">Japanese Macaque</td>
<td valign="top" align="center">3,784</td>
<td valign="top" align="center">4,952</td>
</tr>
<tr>
<td valign="top" align="left">Itozu no Mori Zoological Park</td>
<td valign="top" align="left">Japanese Macaque</td>
<td valign="top" align="center">1,312</td>
<td valign="top" align="center">1,622</td>
</tr>
<tr>
<td valign="top" align="left">Primate Research Institute</td>
<td valign="top" align="left">Japanese Macaque</td>
<td valign="top" align="center">1,641</td>
<td valign="top" align="center">2,131</td>
</tr>
<tr>
<td valign="top" align="left">Inokashira Park Zoo</td>
<td valign="top" align="left">Rhesus Macaque</td>
<td valign="top" align="center">2,747</td>
<td valign="top" align="center">3,203</td>
</tr>
<tr>
<td valign="top" align="left">Tobu Zoo</td>
<td valign="top" align="left">Rhesus Macaque</td>
<td valign="top" align="center">2,461</td>
<td valign="top" align="center">2,755</td>
</tr>
<tr style="border-bottom: thin solid #000000;">
<td valign="top" align="left">Google Open Images</td>
<td valign="top" align="left">Various</td>
<td valign="top" align="center">1,138</td>
<td valign="top" align="center">1,730</td>
</tr> <tr>
<td valign="top" align="left">Total</td>
<td/>
<td valign="top" align="center">1,3083</td>
<td valign="top" align="center">1,6393</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To understand how the brain generates our behavior, analyzing naturalistic behaviors is crucial. The brain evolved from natural behaviors, and various behaviors, such as complex social behaviors, can be observed only in the natural situations (Datta et al., <xref ref-type="bibr" rid="B9">2019</xref>; Mathis and Mathis, <xref ref-type="bibr" rid="B20">2020</xref>). The high-resolution spatiotemporal data obtained with the markerless motion capture will also aid in understanding brain dynamics underlying the behavior (Berger et al., <xref ref-type="bibr" rid="B2">2020</xref>). Specific posture and motion are informative for studying animals&#x00027; emotions and intension (Nakamura et al., <xref ref-type="bibr" rid="B22">2016</xref>), and the motor functions (Berger et al., <xref ref-type="bibr" rid="B2">2020</xref>). Furthermore, the automatic and long-term analyses of naturalistic behavior from a large number of subjects permit new data-driven approaches to find unusual behaviors, personalities and, underlying genetic and neural mechanisms (Vogelstein et al., <xref ref-type="bibr" rid="B27">2014</xref>; De Chaumont et al., <xref ref-type="bibr" rid="B10">2019</xref>). For instance, the recently discovered autistic traits exhibited by macaque monkeys (Yoshida et al., <xref ref-type="bibr" rid="B29">2016</xref>) was identified by such a behavioral observation. Thus, the markerless motion capture for macaque monkeys developed based on the present dataset will be of great use for many neuroscience studies.</p>
<p>The performance evaluation of the network trained with the present dataset revealed that there is still room for improvement regarding the misattribution of the limb keypoints (<xref ref-type="fig" rid="F2">Figure 2h</xref>, <xref ref-type="table" rid="T2">Table 2</xref>), although the RMSE indicates the human-level performance (<xref ref-type="fig" rid="F3">Figure 3</xref>). The DeepLabCut algorithm (Mathis et al., <xref ref-type="bibr" rid="B18">2018</xref>) used in the present evaluation does not explicitly utilize the prior knowledge about the animal&#x00027;s body, whereas the other algorithms were suggested to use the connection between keypoints (Insafutdinov et al., <xref ref-type="bibr" rid="B15">2016</xref>; Cao et al., <xref ref-type="bibr" rid="B5">2017</xref>) or 3D shape of the subject (Biggs et al., <xref ref-type="bibr" rid="B4">2018</xref>; Zuffi et al., <xref ref-type="bibr" rid="B30">2019</xref>). Such utilization of the prior knowledge may help to improve the estimation. However, even the state-of-the-art human motion capture algorithms also have difficulties in analyzing the pictures with severe occlusion or crowded people (Mathis and Mathis, <xref ref-type="bibr" rid="B20">2020</xref>). Due to severe occlusions more frequently being observed in naturalistic behaviors in monkeys than in humans, better algorithms may be required in the future. An alternative approach for the improvement will be enriching the dataset itself. Although we tried to capture many different poses in various contexts, the sampling was biased to the frequently observed poses. Adding data selectively for the rarely observed poses may improve the performance of the trained network. Combining with the other monkey datasets made for laboratory environments (Bala et al., <xref ref-type="bibr" rid="B1">2020</xref>; Berger et al., <xref ref-type="bibr" rid="B2">2020</xref>) or transfer learning of the network trained with the human dataset (Sanakoyeu et al., <xref ref-type="bibr" rid="B25">2020</xref>) are also interesting approaches. Nevertheless, in practice, the performance of the network shown in the present study may be sufficient for many applications, after appropriate temporal filtering of the motion data (Berman et al., <xref ref-type="bibr" rid="B3">2014</xref>; Nath et al., <xref ref-type="bibr" rid="B23">2019</xref>) and additional training with the labels made on the pictures in the target experiment (Mathis et al., <xref ref-type="bibr" rid="B19">2019</xref>).</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Number of the misattribution errors.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Keypoint pairs</bold></th>
<th valign="top" align="left"><bold>Correct</bold></th>
<th valign="top" align="center"><bold>L-R incorrect</bold></th>
<th valign="top" align="center"><bold>F-H incorrect</bold></th>
<th valign="top" align="center"><bold>L-R and F-H incorrect</bold></th>
<th valign="top" align="center"><bold>Total</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Shoulder</td>
<td valign="top" align="left">1,225</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">9</td>
</tr>
<tr>
<td valign="top" align="left">Hip</td>
<td valign="top" align="left">1,062</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">12</td>
</tr>
<tr>
<td valign="top" align="left">Elbow</td>
<td valign="top" align="left">1,134</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">20</td>
</tr>
<tr>
<td valign="top" align="left">Knee</td>
<td valign="top" align="left">1,066</td>
<td valign="top" align="center">45</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">59</td>
</tr>
<tr>
<td valign="top" align="left">Wrist</td>
<td valign="top" align="left">1,049</td>
<td valign="top" align="center">14</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">27</td>
</tr>
<tr>
<td valign="top" align="left">Ankle</td>
<td valign="top" align="left">1,045</td>
<td valign="top" align="center">29</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">45</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>L-R incorrect referring to left or right predicted keypoint was incorrect; F-H switch, forelimb or hindlimb label was incorrect</italic>.</p>
</table-wrap-foot>
</table-wrap>
<p>In the present study, we evaluated the keypoint estimation in 2D images by the neural network. However, for the next step of behavior analysis, the researchers would need to reconstruct the 3D pose and motion of the animals (Nath et al., <xref ref-type="bibr" rid="B23">2019</xref>; Bala et al., <xref ref-type="bibr" rid="B1">2020</xref>) then label the behaviors that the animals are exhibiting based on the estimated pose and motion (Datta et al., <xref ref-type="bibr" rid="B9">2019</xref>). The post-processing methods for converting the high-dimensional motion data into meaningful and interpretable behavioral events and parameters of a single animal or interacting animals are still under active developments (Berman et al., <xref ref-type="bibr" rid="B3">2014</xref>; Datta et al., <xref ref-type="bibr" rid="B9">2019</xref>; Dviwedi et al., <xref ref-type="bibr" rid="B12">2020</xref>). The present dataset will permit simple access to motion data of macaques in various environments, and this could accelerate the development of post-processing method by accumulating the motion data associated with various natural behaviors. It is also interesting to add labels of monkey behavior (e.g., running, eating, sleeping, grooming, fighting, etc.) engaged in each picture in the present dataset, for the development of the behavioral event detection methods.</p>
</sec>
<sec sec-type="conclusions" id="s5">
<title>Conclusion</title>
<p>We created a novel large open dataset of keypoint labels of macaques in naturalistic scenes. The dataset will be instrumental to train/test the neural networks for markerless motion capture of the macaques and developments of the algorithms for the networks, contributing to the establishment of an innovative platform of behavior analysis for non-human primates for neuroscience and medicine, as well as the other fields using macaques (Carlsson et al., <xref ref-type="bibr" rid="B7">2004</xref>).</p>
</sec>
<sec sec-type="data-availability-statement" id="s6">
<title>Data Availability Statement</title>
<p>The dataset for this study is publicly available on the website of Primate Research Institute, Kyoto University (<ext-link ext-link-type="uri" xlink:href="http://www.pri.kyoto-u.ac.jp/datasets/">http://www.pri.kyoto-u.ac.jp/datasets/</ext-link>). The trained network model described in the present paper is readily available through DeepLabCut Model Zoo (<ext-link ext-link-type="uri" xlink:href="http://www.mousemotorlab.org/dlc-modelzoo">http://www.mousemotorlab.org/dlc-modelzoo</ext-link>). The other raw data supporting the conclusions of this article will be made available by the authors upon request.</p>
</sec>
<sec id="s7">
<title>Ethics Statement</title>
<p>The animal study was reviewed and approved by Animal Welfare and Animal Care Committee of the Primate Research Institute of Kyoto University.</p>
</sec>
<sec id="s8">
<title>Author Contributions</title>
<p>RL, TS, JM, KI, YG, HNishij, and HNishim designed this research. JM, KI, TS, RL, and MT created the dataset. RL, SBN, JM, and TS evaluated the performance of the neural network trained with the dataset. All the authors discussed the results and commented on the manuscript, read and approved the final manuscript.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
</body>
<back>
<ack><p>We would like to thank Yu Yang, Yang Meng, Kei Kimura, Yukiko Otsuka, Andi Zheng, Jungmin Oh, Gaoge Yan, and Yuki Kinoshita for manually labeling and refining the keypoints.</p></ack>
<sec sec-type="supplementary-material" id="s9">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fnbeh.2020.581154/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fnbeh.2020.581154/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Image_1.PDF" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image_2.PDF" id="SM2" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image_3.PDF" id="SM3" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Video_1.MP4" id="SM4" mimetype="video/mp4" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_1.DOCX" id="SM5" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bala</surname> <given-names>P. C.</given-names></name> <name><surname>Eisenreich</surname> <given-names>B. R.</given-names></name> <name><surname>Yoo</surname> <given-names>S. B. M.</given-names></name> <name><surname>Hayden</surname> <given-names>B. Y.</given-names></name> <name><surname>Park</surname> <given-names>H. S.</given-names></name> <name><surname>Zimmermann</surname> <given-names>J.</given-names></name></person-group> (<year>2020</year>). <article-title>Automated markerless pose estimation in freely moving macaques with OpenMonkeyStudio</article-title>. <source>Nat. Commun.</source> <volume>11</volume>:<fpage>4560</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-020-18441-5</pub-id><pub-id pub-id-type="pmid">32917899</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Berger</surname> <given-names>M.</given-names></name> <name><surname>Agha</surname> <given-names>N. S.</given-names></name> <name><surname>Gail</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>Wireless recording from unrestrained monkeys reveals motor goal encoding beyond immediate reach in frontoparietal cortex</article-title>. <source>Elife</source> <volume>9</volume>:<fpage>e51322</fpage>. <pub-id pub-id-type="doi">10.7554/eLife.51322</pub-id><pub-id pub-id-type="pmid">32364495</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Berman</surname> <given-names>G. J.</given-names></name> <name><surname>Choi</surname> <given-names>D. M.</given-names></name> <name><surname>Bialek</surname> <given-names>W.</given-names></name> <name><surname>Haevitz</surname> <given-names>J. W.</given-names></name></person-group> (<year>2014</year>). <article-title>Mapping the stereotyped behaviour of freely-moving fruit flies</article-title>. <source>J. R. Soc. Interface</source> <volume>11</volume>:<fpage>20140672</fpage>. <pub-id pub-id-type="doi">10.1098/rsif.2014.0672</pub-id><pub-id pub-id-type="pmid">25142523</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Biggs</surname> <given-names>B.</given-names></name> <name><surname>Roddick</surname> <given-names>T.</given-names></name> <name><surname>Fitzgibbon</surname> <given-names>A.</given-names></name> <name><surname>Cipolla</surname> <given-names>R.</given-names></name></person-group> (<year>2018</year>). <article-title>Creatures great and SMAL: recovering the shape and motion of animals from video</article-title>, in <source>ACCV 2018: 14th Asian Conference on Computer Vision</source> (<publisher-loc>Perth, WA</publisher-loc>), <fpage>3</fpage>&#x02013;<lpage>19</lpage>.</citation></ref>
<ref id="B5">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Cao</surname> <given-names>Z.</given-names></name> <name><surname>Simon</surname> <given-names>T.</given-names></name> <name><surname>Wei</surname> <given-names>S. E.</given-names></name> <name><surname>Sheikh</surname> <given-names>Y.</given-names></name></person-group> (<year>2017</year>). <article-title>Realtime multi-person 2d pose estimation using part affinity fields</article-title>, in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source> (<publisher-loc>Honolulu, HI</publisher-loc>), <fpage>7291</fpage>&#x02013;<lpage>7299</lpage>. <pub-id pub-id-type="pmid">31331883</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Capitanio</surname> <given-names>J. P.</given-names></name> <name><surname>Emborg</surname> <given-names>M. E.</given-names></name></person-group> (<year>2008</year>). <article-title>Contributions of non-human primates to neuroscience research</article-title>. <source>Lancet</source> <volume>371</volume>, <fpage>1126</fpage>&#x02013;<lpage>1135</lpage>. <pub-id pub-id-type="doi">10.1016/S0140-6736(08)60489-4</pub-id><pub-id pub-id-type="pmid">18374844</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Carlsson</surname> <given-names>H. E.</given-names></name> <name><surname>Schapiro</surname> <given-names>S. J.</given-names></name> <name><surname>Farah</surname> <given-names>I.</given-names></name> <name><surname>Hau</surname> <given-names>J.</given-names></name></person-group> (<year>2004</year>). <article-title>Use of primates in research: a global overview</article-title>. <source>Am. J. Primatol.</source> <volume>63</volume>, <fpage>225</fpage>&#x02013;<lpage>237</lpage>. <pub-id pub-id-type="doi">10.1002/ajp.20054</pub-id><pub-id pub-id-type="pmid">15300710</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cregg</surname> <given-names>J. M.</given-names></name> <name><surname>Leiras</surname> <given-names>R.</given-names></name> <name><surname>Montalant</surname> <given-names>A.</given-names></name> <name><surname>Wanken</surname> <given-names>P.</given-names></name> <name><surname>Wickersham</surname> <given-names>I. R.</given-names></name> <name><surname>Kiehn</surname> <given-names>O.</given-names></name></person-group> (<year>2020</year>). <article-title>Brainstem neurons that command mammalian locomotor asymmetries</article-title>. <source>Nat. Neurosci.</source> <volume>23</volume>, <fpage>730</fpage>&#x02013;<lpage>740</lpage>. <pub-id pub-id-type="doi">10.1038/s41593-020-0633-7</pub-id><pub-id pub-id-type="pmid">32393896</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Datta</surname> <given-names>S. R.</given-names></name> <name><surname>Anderson</surname> <given-names>D. J.</given-names></name> <name><surname>Branson</surname> <given-names>K.</given-names></name> <name><surname>Perona</surname> <given-names>P.</given-names></name> <name><surname>Leifer</surname> <given-names>A.</given-names></name></person-group> (<year>2019</year>). <article-title>Computational neuroethology: a call to action</article-title>. <source>Neuron</source> <volume>104</volume>, <fpage>11</fpage>&#x02013;<lpage>24</lpage>. <pub-id pub-id-type="doi">10.1016/j.neuron.2019.09.038</pub-id><pub-id pub-id-type="pmid">31600508</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>De Chaumont</surname> <given-names>F.</given-names></name> <name><surname>Ey</surname> <given-names>E.</given-names></name> <name><surname>Torquet</surname> <given-names>N.</given-names></name> <name><surname>Lagache</surname> <given-names>T.</given-names></name> <name><surname>Dallongeville</surname> <given-names>S.</given-names></name> <name><surname>Imbert</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Live mouse tracker: real-time behavioral analysis of groups of mice</article-title>. <source>Nat. Biomed. Eng.</source> <volume>3</volume>, <fpage>930</fpage>&#x02013;<lpage>942</lpage>. <pub-id pub-id-type="doi">10.1038/s41551-019-0396-1</pub-id><pub-id pub-id-type="pmid">31110290</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dooley</surname> <given-names>J. C.</given-names></name> <name><surname>Glanz</surname> <given-names>R. M.</given-names></name> <name><surname>Sokoloff</surname> <given-names>G.</given-names></name> <name><surname>Blumberg</surname> <given-names>M. S.</given-names></name></person-group> (<year>2020</year>). <article-title>Self-generated whisker movements drive state-dependent sensory input to developing barrel cortex</article-title>. <source>Curr. Biol.</source> <volume>30</volume>, <fpage>2404</fpage>&#x02013;<lpage>2410.e4</lpage>. <pub-id pub-id-type="doi">10.1016/j.cub.2020.04.045</pub-id><pub-id pub-id-type="pmid">32413304</pub-id></citation></ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dviwedi</surname> <given-names>S. K.</given-names></name> <name><surname>Ngeo</surname> <given-names>J. G.</given-names></name> <name><surname>Shibata</surname> <given-names>T.</given-names></name></person-group> (<year>2020</year>). <article-title>Extraction of Nonlinear synergies for proportional and simultaneous estimation of finger kinematics</article-title>. <source>IEEE Trans. Biomed. Eng.</source> <volume>67</volume>, <fpage>2646</fpage>&#x02013;<lpage>2658</lpage>. <pub-id pub-id-type="doi">10.1109/tbme.2020.2967154</pub-id><pub-id pub-id-type="pmid">31976877</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Graving</surname> <given-names>J. M.</given-names></name> <name><surname>Chae</surname> <given-names>D.</given-names></name> <name><surname>Naik</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>L.</given-names></name> <name><surname>Koger</surname> <given-names>B.</given-names></name> <name><surname>Costelloe</surname> <given-names>B. R.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning</article-title>. <source>Elife</source> <volume>8</volume>:<fpage>e47994</fpage>. <pub-id pub-id-type="doi">10.7554/eLife.47994.sa2</pub-id><pub-id pub-id-type="pmid">31570119</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>He</surname> <given-names>K.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Ren</surname> <given-names>S.</given-names></name> <name><surname>Sun</surname> <given-names>J.</given-names></name></person-group> (<year>2016</year>). <article-title>Deep residual learning for imagerecognition</article-title>, in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source> (<publisher-loc>Piscataway, NJ</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>770</fpage>&#x02013;<lpage>778</lpage>.</citation></ref>
<ref id="B15">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Insafutdinov</surname> <given-names>E.</given-names></name> <name><surname>Pishchulin</surname> <given-names>L.</given-names></name> <name><surname>Andres</surname> <given-names>B.</given-names></name> <name><surname>Andriluka</surname> <given-names>M.</given-names></name> <name><surname>Schiele</surname> <given-names>B.</given-names></name></person-group> (<year>2016</year>). <article-title>DeeperCut: a deeper, stronger, and faster multi-person pose estimation model</article-title>, in <source>European Conference on Computer Vision</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>34</fpage>&#x02013;<lpage>50</lpage>.</citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kalin</surname> <given-names>N. H.</given-names></name> <name><surname>Shelton</surname> <given-names>S. E.</given-names></name></person-group> (<year>2006</year>). <article-title>Nonhuman primate models to study anxiety, emotion regulation, and psychopathology</article-title>. <source>Ann. N.Y. Acad. Sci.</source> <volume>1008</volume>, <fpage>189</fpage>&#x02013;<lpage>200</lpage>. <pub-id pub-id-type="doi">10.1196/annals.1301.021</pub-id><pub-id pub-id-type="pmid">14998885</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>T. Y.</given-names></name> <name><surname>Maire</surname> <given-names>M.</given-names></name> <name><surname>Belongie</surname> <given-names>S.</given-names></name> <name><surname>Hays</surname> <given-names>J.</given-names></name> <name><surname>Perona</surname> <given-names>P.</given-names></name> <name><surname>Ramanan</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>Microsoft coco: common objects in context</article-title>, in <source>European Conference on Computer Vision</source> (<publisher-loc>Zurich</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>740</fpage>&#x02013;<lpage>755</lpage>.</citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mathis</surname> <given-names>A.</given-names></name> <name><surname>Mamidanna</surname> <given-names>P.</given-names></name> <name><surname>Cury</surname> <given-names>K. M.</given-names></name> <name><surname>Abe</surname> <given-names>T.</given-names></name> <name><surname>Murthy</surname> <given-names>V. N.</given-names></name> <name><surname>Mathis</surname> <given-names>M. W.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>DeepLabCut: markerless pose estimation of user-defined body parts with deep learning</article-title>. <source>Nat. Neurosci.</source> <volume>21</volume>, <fpage>1281</fpage>&#x02013;<lpage>1289</lpage>. <pub-id pub-id-type="doi">10.1038/s41593-018-0209-y</pub-id><pub-id pub-id-type="pmid">30127430</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Mathis</surname> <given-names>A.</given-names></name> <name><surname>Y&#x000FC;ksekg&#x000F6;n&#x000FC;l</surname> <given-names>M.</given-names></name> <name><surname>Rogers</surname> <given-names>B.</given-names></name> <name><surname>Bethge</surname> <given-names>M.</given-names></name> <name><surname>Mathis</surname> <given-names>M. W.</given-names></name></person-group> (<year>2019</year>). <source>Pretraining Boosts Out-of-Domain Robustness for Pose Estimation</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/1909.11229">http://arxiv.org/abs/1909.11229</ext-link> (accessed November 12, 2020).</citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mathis</surname> <given-names>M. W.</given-names></name> <name><surname>Mathis</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>Deep learning tools for the measurement of animal behavior in neuroscience</article-title>. <source>Curr. Opin. Neurobiol.</source> <volume>60</volume>, <fpage>1</fpage>&#x02013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1016/j.conb.2019.10.008</pub-id><pub-id pub-id-type="pmid">31791006</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nagasaka</surname> <given-names>Y.</given-names></name> <name><surname>Shimoda</surname> <given-names>K.</given-names></name> <name><surname>Fujii</surname> <given-names>N.</given-names></name></person-group> (<year>2011</year>). <article-title>Multidimensional recording (MDR) and data sharing: an ecological open research and educational platform for neuroscience</article-title>. <source>PLoS ONE</source> <volume>6</volume>:<fpage>e22561</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0022561</pub-id><pub-id pub-id-type="pmid">21811633</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nakamura</surname> <given-names>T.</given-names></name> <name><surname>Matsumoto</surname> <given-names>J.</given-names></name> <name><surname>Nishimaru</surname> <given-names>H.</given-names></name> <name><surname>Bretas</surname> <given-names>R. V.</given-names></name> <name><surname>Takamura</surname> <given-names>Y.</given-names></name> <name><surname>Hori</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>A markerless 3D computerized motion capture system incorporating a skeleton model for monkeys</article-title>. <source>PLoS ONE</source>. <volume>11</volume>:<fpage>e0166154</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0166154</pub-id><pub-id pub-id-type="pmid">27812205</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nath</surname> <given-names>T.</given-names></name> <name><surname>Mathis</surname> <given-names>A.</given-names></name> <name><surname>Chen</surname> <given-names>A. C.</given-names></name> <name><surname>Patel</surname> <given-names>A.</given-names></name> <name><surname>Bethge</surname> <given-names>M.</given-names></name> <name><surname>Mathis</surname> <given-names>M. W.</given-names></name></person-group> (<year>2019</year>). <article-title>Using DeepLabCut for 3D markerless pose estimation across species and behaviors</article-title>. <source>Nat. Protoc.</source> <volume>14</volume>, <fpage>2152</fpage>&#x02013;<lpage>2176</lpage>. <pub-id pub-id-type="doi">10.1038/s41596-019-0176-0</pub-id><pub-id pub-id-type="pmid">31227823</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nelson</surname> <given-names>E. E.</given-names></name> <name><surname>Winslow</surname> <given-names>J. T.</given-names></name></person-group> (<year>2008</year>). <article-title>Non-human primates: model animals for developmental psychopathology</article-title>. <source>Neuropsychopharmacology</source> <volume>34</volume>, <fpage>90</fpage>&#x02013;<lpage>105</lpage>. <pub-id pub-id-type="doi">10.1038/npp.2008.150</pub-id><pub-id pub-id-type="pmid">18800061</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Sanakoyeu</surname> <given-names>A.</given-names></name> <name><surname>Khalidov</surname> <given-names>V.</given-names></name> <name><surname>McCarthy</surname> <given-names>M. S.</given-names></name> <name><surname>Vedaldi</surname> <given-names>A.</given-names></name> <name><surname>Neverova</surname> <given-names>N.</given-names></name></person-group> (<year>2020</year>). <article-title>Transferring dense pose to proximal animal classes</article-title>, in <source>2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</source> (<publisher-loc>Seattle, WA</publisher-loc>), <fpage>5232</fpage>&#x02013;<lpage>5241</lpage>.</citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vargas-Irwin</surname> <given-names>C. E.</given-names></name> <name><surname>Shakhnarovich</surname> <given-names>G.</given-names></name> <name><surname>Yadollahpour</surname> <given-names>P.</given-names></name> <name><surname>Mislow</surname> <given-names>J. M.</given-names></name> <name><surname>Black</surname> <given-names>M. J.</given-names></name> <name><surname>Donoghue</surname> <given-names>J. P.</given-names></name></person-group> (<year>2008</year>). <article-title>Decoding complete reach and grasp actions from local primary motor cortex populations</article-title>. <source>J. Neurosci.</source> <volume>30</volume>, <fpage>9659</fpage>&#x02013;<lpage>9669</lpage>. <pub-id pub-id-type="doi">10.1523/JNEUROSCI.5443-09.2010</pub-id><pub-id pub-id-type="pmid">20660249</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vogelstein</surname> <given-names>J. T.</given-names></name> <name><surname>Park</surname> <given-names>Y.</given-names></name> <name><surname>Ohyama</surname> <given-names>T.</given-names></name> <name><surname>Kerr</surname> <given-names>R. A.</given-names></name> <name><surname>Truman</surname> <given-names>J. W.</given-names></name> <name><surname>Priebe</surname> <given-names>C. E.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>Discovery of brainwide neural-behavioral maps via multiscale unsupervised structure learning</article-title>. <source>Science</source> <volume>344</volume>, <fpage>386</fpage>&#x02013;<lpage>92</lpage>. <pub-id pub-id-type="doi">10.1126/science.1250298</pub-id><pub-id pub-id-type="pmid">24674869</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Watson</surname> <given-names>K. K.</given-names></name> <name><surname>Platt</surname> <given-names>M. L.</given-names></name></person-group> (<year>2012</year>). <article-title>Of mice and monkeys: using non-human primate models to bridge mouse- and human-based investigations of autism spectrum disorders</article-title>. <source>J. Neurodev. Disord.</source> <volume>4</volume>:<fpage>21</fpage>. <pub-id pub-id-type="doi">10.1186/1866-1955-4-21</pub-id><pub-id pub-id-type="pmid">22958282</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yoshida</surname> <given-names>K.</given-names></name> <name><surname>Go</surname> <given-names>Y.</given-names></name> <name><surname>Kushima</surname> <given-names>I.</given-names></name> <name><surname>Toyoda</surname> <given-names>A.</given-names></name> <name><surname>Fujiyama</surname> <given-names>A.</given-names></name> <name><surname>Imai</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Single-neuron and genetic correlates of autistic behavior in macaque</article-title>. <source>Sci. Adv.</source> <volume>2</volume>:<fpage>e1600558</fpage>. <pub-id pub-id-type="doi">10.1126/sciadv.1600558</pub-id><pub-id pub-id-type="pmid">27679817</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Zuffi</surname> <given-names>S.</given-names></name> <name><surname>Kanazawa</surname> <given-names>A.</given-names></name> <name><surname>Berger-Wolf</surname> <given-names>T.</given-names></name> <name><surname>Black</surname> <given-names>M. J.</given-names></name></person-group> (<year>2019</year>). <article-title>Three-D safari: learning to estimate zebra pose, shape, and texture from images &#x0201C;in the wild&#x0201D;</article-title>, in <source>International Conference on Computer Vision</source> (<publisher-loc>Seoul</publisher-loc>).</citation></ref>
</ref-list>
<fn-group>
<fn fn-type="financial-disclosure"><p><bold>Funding.</bold> This work was supported by the Cooperative Research Program of Primate Research Institute, Kyoto University, the Grant-in-Aid for Scientific Research from Japan Society for the Promotion of Science (Nos. 16H06534, 19H04984, and 19H05467), and the grant of Joint Research by the National Institutes of Natural Sciences (NINS) (NINS Program No. 01111901).</p></fn>
</fn-group>
</back>
</article>
