<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Neurorobot.</journal-id>
<journal-title>Frontiers in Neurorobotics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Neurorobot.</abbrev-journal-title>
<issn pub-type="epub">1662-5218</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fnbot.2023.1320251</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Neuroscience</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Peg-in-hole assembly skill imitation learning method based on ProMPs under task geometric representation</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Zang</surname> <given-names>Yajing</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1362289/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Pengfei</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zha</surname> <given-names>Fusheng</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/799501/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Guo</surname> <given-names>Wei</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Zheng</surname> <given-names>Chao</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Sun</surname> <given-names>Lining</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>School of Mechatronics Engineering, State Key Laboratory of Robotics and System, Harbin Institute of Technology</institution>, <addr-line>Harbin</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Wuhan Second Ship Design and Research Institute</institution>, <addr-line>Wuhan</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Alois C. Knoll, Technical University of Munich, Germany</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Weida Li, Soochow University, China; Chen Chen, Harbin University of Science and Technology, China</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Fusheng Zha <email>zhafusheng&#x00040;hit.edu.cn</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>09</day>
<month>11</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>17</volume>
<elocation-id>1320251</elocation-id>
<history>
<date date-type="received">
<day>12</day>
<month>10</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>18</day>
<month>10</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2023 Zang, Wang, Zha, Guo, Zheng and Sun.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Zang, Wang, Zha, Guo, Zheng and Sun</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Behavioral Cloning (BC) is a common imitation learning method which utilizes neural networks to approximate the demonstration action samples for task manipulation skill learning. However, in the real world, the demonstration trajectories from human are often sparse and imperfect, which makes it challenging to comprehensively learn directly from the demonstration action samples. Therefore, in this paper, we proposes a streamlined imitation learning method under the terse geometric representation to take good advantage of the demonstration data, and then realize the manipulation skill learning of assembly tasks.</p></sec>
<sec>
<title>Methods</title>
<p>We map the demonstration trajectories into the geometric feature space. Then we align the demonstration trajectories by Dynamic Time Warping (DTW) method to get the unified data sequence so we can segment them into several time stages. The Probability Movement Primitives (ProMPs) of the demonstration trajectories are then extracted, so we can generate a lot of task trajectories to be the global strategy action samples for training the neural networks. Notalby, we regard the current state of the assembly task as the via point of the ProMPs model to get the generated trajectories, while the time point of the via point is calculated according to the probability model of the different time stages. And we get the action of the current state according to the target position of the next time state. Finally, we train the neural network to obtain the global assembly strategy by Behavioral Cloning.</p></sec>
<sec>
<title>Results</title>
<p>We applied the proposed method to the peg-in-hole assembly task in the simulation environment based on Pybullet &#x0002B; Gym to test its task skill learning performance. And the learned assembly strategy was also executed on a real robotic platform to verify the feasibility of the method further.</p></sec>
<sec>
<title>Discussion</title>
<p>According to the result of the experiment, the proposed method achieves higher success rates compared to traditional imitation learning methods while exhibiting reasonable generalization capabilities. It shows that the ProMPs under geometric representation can help the BC method make better use of the demonstration trajectory and thus better learn the task skills.</p></sec></abstract>
<kwd-group>
<kwd>peg-in-hole assembly</kwd>
<kwd>imitation learning</kwd>
<kwd>Behavioral Cloning</kwd>
<kwd>probabilistic movement primitives</kwd>
<kwd>robot manipulation planning</kwd>
</kwd-group>
<counts>
<fig-count count="13"/>
<table-count count="0"/>
<equation-count count="14"/>
<ref-count count="27"/>
<page-count count="12"/>
<word-count count="6391"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1. Introduction</title>
<p>Assembly operations are a critical process in manufacturing, involving the connection and assembly of various components to create products, and they encompass nearly every aspect of the manufacturing industry (Zhao et al., <xref ref-type="bibr" rid="B27">2023</xref>). Industrial robots, serving as the primary execution devices in assembly production, face the crucial challenge of rapidly acquiring assembly operation skills, which significantly impact the development of the manufacturing sector.</p>
<p>In order to equip the robot with manipulation skills effectively, a variety of robot task learning methods have emerged in recent years (Bing et al., <xref ref-type="bibr" rid="B2">2022a</xref>,<xref ref-type="bibr" rid="B3">b</xref>). Among these methods, reinforcement learning (Luo et al., <xref ref-type="bibr" rid="B17">2019</xref>; Bing et al., <xref ref-type="bibr" rid="B5">2023b</xref>) and imitation learning (Ehlers et al., <xref ref-type="bibr" rid="B9">2019</xref>; Huang et al., <xref ref-type="bibr" rid="B10">2019</xref>) are the most common methods. Reinforcement learning methods are designed to give robots the ability to learn autonomously, which enables the robot to learn the unknown tasks completely independently through neural network agents. As for the tasks for which humans have sufficient operational experience, imitation learning methods can make use of human operational experience to achieve rapid learning of task skills and are usually more widely used.</p>
<p>Demonstration learning is a common method for robot skill acquisition. This approach extracts operation features from perceptual data obtained during human-demonstrated tasks. It then models operational skills using mathematical techniques and, finally, replicates and generalizes actions based on skill parameters. This methodology mimics the human learning process, enabling robots to effectively inherit assembly experiential knowledge already possessed by humans.</p>
<p>Traditional demonstration learning methods mainly fall into two categories: those based on mathematical models and those based on neural networks. Mathematical model-based methods, such as Dynamic Movement Primitives (DMP) (Chen et al., <xref ref-type="bibr" rid="B7">2017</xref>; Liu et al., <xref ref-type="bibr" rid="B16">2020</xref>), ProMPs (Paraschos et al., <xref ref-type="bibr" rid="B18">2018</xref>), and Task-Parameterized Gaussian Mixture Model (TP-GMM) (Rozo et al., <xref ref-type="bibr" rid="B20">2015</xref>, <xref ref-type="bibr" rid="B21">2016</xref>; Silv&#x000E9;rio et al., <xref ref-type="bibr" rid="B22">2019</xref>), model demonstration data by establishing trajectory models. These methods learn operational knowledge by adjusting and optimizing the parameters of mathematical models, similar to how humans adjust and refine their modeling process based on relevant experience. While mathematical approaches can provide precise models, they often require a high level of accuracy in input state information. Consequently, it is challenging for these models to generalize knowledge to the entire global task space.</p>
<p>The paper is organized as follows: Section 2 summarizes the existing research related to the proposed method; Section 3 introduces the implementation detail and the quantitative evaluation of the proposed method; Section 4 provides a series of experimental results and analysis. In Section 5, conclusions are drawn and directions for future work are provided.</p>
<p>Even though networks is widely known to be used in machine learning categories (Bing et al., <xref ref-type="bibr" rid="B4">2023a</xref>,<xref ref-type="bibr" rid="B6">c</xref>), they are also beginning to be taken good use for imitation learning methods. Till now network-based imitation learning methods involve sampling human-operated skills and then training neural networks using the acquired sample data to achieve skill learning. As of now, neural network-based imitation learning methods primarily include BC methods (Li et al., <xref ref-type="bibr" rid="B15">2022</xref>), where neural networks are directly trained, and Generative Adversarial Imitation Learning (GAIL) methods (Tsurumine and Matsubara, <xref ref-type="bibr" rid="B23">2022</xref>), which approximate strategies through generative adversarial techniques. Neural network-based imitation learning methods excel in strategy approximation. However, due to the typically sparse nature of human demonstration data, training neural networks to obtain global strategies with a limited sample size poses a significant challenge.</p>
<p>Peg and hole assembly, a representative assembly task, finds widespread applications in aerospace manufacturing, shipbuilding, precision instrument manufacturing, and other fields. However, the diverse ways pegs and holes interact result in a multitude of task states and corresponding skill strategies. Consequently, collecting strategy samples becomes exceptionally challenging, making it even more difficult to achieve global skill learning in the task space through neural network-based imitation learning methods.</p>
<p>To address this issue, we propose a method that leverages a compact task representation space to achieve data augmentation. With limited demonstration data, we employ the ProMPs method to establish a mathematical model for global task space strategy. We then use BC to train neural networks, enabling global skill learning in the task space. This paper&#x00027;s contributions are as follows:</p>
<list list-type="order">
<list-item><p>We extracted human demonstration data and mapped it to a feature space using the method described in Zang et al. (<xref ref-type="bibr" rid="B26">2023</xref>).</p></list-item>
<list-item><p>Using the trajectory data from the feature space, we established an abstract mathematical model for global strategy using the ProMPs method.</p></list-item>
<list-item><p>Neural networks were trained using the BC method to achieve assembly task strategy learning within the task space.</p></list-item>
</list>
<p>This paper is organized as follows: In Section 2, we provide an overview of related work. Section 3 outlines the methodology for establishing the velocity assembly skill model and the imitation learning approach guided by skill knowledge. Next, in Section 4, we conduct both simulation experiments and real robot experiments to validate the proposed method&#x00027;s performance and analyze the experimental results. Finally, Section 5 summarizes the entire paper.</p>
</sec>
<sec id="s2">
<title>2. Related work</title>
<p>The proposed method in this paper mainly involves modeling with probabilistic motion primitives and utilizing simplified geometric feature representation. This enables robotic assembly tasks based on behavior cloning methods to achieve improved learning outcomes. Relevant work in this area includes research on robot task representation and robot imitation learning. Recent research achievements in this field are as follows.</p>
<sec>
<title>2.1. Representation methods for peg-in-hole assembly tasks</title>
<p>In the field of robot task learning, methods for task representation play a crucial role. Due to the varying representation requirements posed by different task learning methods, several types of task representation methods have emerged to date. These methods mainly fall into three categories: feature-based representation, perception-based representation, and neural network-based abstract representation.</p>
<p>Feature-based representation categorizes task states into discrete types. For instance, in Tsuruoka et al. (<xref ref-type="bibr" rid="B24">1997</xref>), contact states are classified into discrete categories such as single-point contact, two-point contact, three-point contact, surface contact and so on. A simplified state representation containing only three contact state categories was proposed in Huang et al. (<xref ref-type="bibr" rid="B11">2020</xref>) for medium-gap dual-arm peg-in-hole assembly tasks. The classification in Park et al. (<xref ref-type="bibr" rid="B19">2017</xref>) can be seen as a simplified version of the one presented in Tsuruoka et al. (<xref ref-type="bibr" rid="B24">1997</xref>) for situations with smaller gaps. While discrete representation methods effectively incorporate geometric features into skill learning, the limited number of categories often hinders detailed task skill modeling.</p>
<p>Continuous perception-based representation methods, as demonstrated in Huang et al. (<xref ref-type="bibr" rid="B11">2020</xref>), establish mappings between pose samples and the corresponding contact forces manually. They then use force sensing information to estimate correct poses, which represent contact states. Work like Park et al. (<xref ref-type="bibr" rid="B19">2017</xref>) introduces five-dimensional contact force information within the three-dimensional representation, resulting in an eight-dimensional continuous perceptual variable to represent the state of reinforcement learning agents. Perception-based information can be directly obtained from sensors and comprehensively represents task information, including poses and contact forces. However, continuous perception information can appear redundant and cumbersome during abstract skill analysis due to the abstract nature of skills.</p>
<p>Abstract information representation, as suggested in Ding et al. (<xref ref-type="bibr" rid="B8">2019</xref>), introduces a reinforcement learning-based pose estimator. This estimator calculates probabilistic weights for the six-dimensional pose space using visual and force information, representing the state for high-precision peg-in-hole assembly tasks in reinforcement learning. A more advanced method for constructing continuous perceptual abstract information is proposed in Lee et al. (<xref ref-type="bibr" rid="B13">2020</xref>) using Variational Autoencoders (VAE). They encode multimodal sensory information, including visual images, depth information, robot force and position sensing information, and contact force information, into an abstract code using a neural network variational model encoder. This approach achieves continuous abstract representation for peg-in-hole assembly tasks. These studies utilize neural networks to represent assembly tasks as concise and abstract state information.</p>
<p>In our study (Zang et al., <xref ref-type="bibr" rid="B26">2023</xref>), we propose a continuous and streamlined representation method for peg-in-hole assembly states by analyzing the geometric features. This method reduces the dimensionality of reinforcement learning for peg-in-hole assembly, simplifying the learning process. In this paper, we will continue to use this abstract representation method to learn human assembly skills under geometric feature representation, aiming to acquire more global assembly skills from sparse human demonstrations.</p>
</sec>
<sec>
<title>2.2. Imitation learning methods for robot manipulation tasks</title>
<p>As mentioned earlier, imitation learning methods are primarily categorized into those based on mathematical models and those based on neural networks.</p>
<p>In mathematical analysis-based imitation learning methods, DMP are one of the earliest and most commonly used approaches. In Liu et al. (<xref ref-type="bibr" rid="B16">2020</xref>), the DMP imitation learning method was employed to extract the variations in trajectory characteristics as skill parameters, which were then used for reproducing demonstrated trajectories. Yang et al. (<xref ref-type="bibr" rid="B25">2019</xref>) utilized the DMP method for skill parameter extraction and operation replication based on human trajectories and stiffness information. However, DMP methods are limited to extracting data features and do not capture specific task features. Consequently, during the replication and generalization processes, they fail to retain the task characteristics inherent in demonstrated information, making it challenging to effectively generalize to similar tasks. Paraschos et al. (<xref ref-type="bibr" rid="B18">2018</xref>) introduced the ProMPs, a derivative of DMP, to extract probabilistic movement primitives of trajectories as skill parameters, facilitating the extraction of operation skill probability distribution parameters. Although ProMPs can retain probabilistic task-related features through probability calculations, the dimensionality redundancy in perception-based task representation compared to task features leads to the inefficient extraction of some task characteristics. Rozo et al. (<xref ref-type="bibr" rid="B21">2016</xref>) used the TP-GMM method to extract the Gaussian Mixture Model (GMM) of trajectories in the task coordinate system as skill parameters. Subsequently, they used Gaussian Mixture Regression (GMR) to replicate demonstrated operations from these skill parameters. While this method retains task-related features, its probability distribution model is still affected by the dimensionality redundancy in perception-based information representation.</p>
<p>With the rise of artificial intelligence technology, neural network-based imitation learning methods have gained widespread application in recent years. Li et al. (<xref ref-type="bibr" rid="B14">2023</xref>) employed the BC method to extract and replicate human driving skill parameters, enabling driving skill learning. Bhattacharyya et al. (<xref ref-type="bibr" rid="B1">2023</xref>) utilized GAIL to extract skill parameters from different driving styles and achieve replication of various driving styles. Additionally, Kim et al. (<xref ref-type="bibr" rid="B12">2020</xref>) introduced the Neural-Network-based Movement Primitive (NNMP) method, which models DMP using neural networks to retain task characteristics. However, the skill parameters obtained from neural network-based imitation learning methods are implicit (uninterpretable) neural network parameters ref8. These parameters not only fail to retain task characteristics but also exhibit limited generalization performance, making them unsuitable for extracting explicit, generic assembly skills.</p>
<p>In this paper, we will continue to learn assembly skills from human demonstration information using imitation learning methods, building upon the geometric feature space of peg-in-hole assembly. This approach aims to improve the performance of traditional imitation learning methods.</p>
</sec>
</sec>
<sec id="s3">
<title>3. Method</title>
<p>In our previous work (Zang et al., <xref ref-type="bibr" rid="B26">2023</xref>), we discovered that modeling task skills within a simplified geometric feature representation space allows for more comprehensive skill learning within the task space. In this paper, we will use traditional geometric-based imitation learning methods, ProMPs, and Behavioral Cloning (BC), to learn fundamental peg-in-hole assembly skills.</p>
<p>Unlike the previous work, this paper takes a different approach. Instead of directly specifying simple assembly skills, we will acquire skills from human demonstration information and engage in direct imitation learning. While this method involves human intervention, it aligns with practical applications where the same assembly task may require different skills under varying task requirements. Therefore, this approach holds promising prospects for learning specific assembly skills in particular application scenarios.</p>
<sec>
<title>3.1. Peg-in-hole assembly ProMPs under geometric representation</title>
<p>In this paper, we will utilize a simplified geometric feature representation of the peg-in-hole assembly task as the task state. Firstly, for ease of calculation, regardless of whether the experiment fixes the peg or the hole component, we assume the peg component remains fixed, and the hole component is mobile when analyzing relative poses.</p>
<p>We denote the relative pose between the peg and the hole as Equation (1). And we have <italic>R</italic><sub><italic>ho</italic></sub> &#x0003D; [<italic>n</italic><sub><italic>x</italic></sub>, <italic>n</italic><sub><italic>y</italic></sub>, <italic>n</italic><sub><italic>z</italic></sub>], <inline-formula><mml:math id="M1"><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi><mml:mi>o</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>z</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula>.</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M2"><mml:mrow><mml:msup><mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msup><mml:msub><mml:mi>T</mml:mi><mml:mrow><mml:mi>h</mml:mi><mml:mi>o</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mrow><mml:mi>h</mml:mi><mml:mi>o</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mi>h</mml:mi><mml:mi>o</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd><mml:mtd><mml:mn>1</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:math></disp-formula>
<p>During the calculation process, we map it to the geometric feature task space following the method described in Zang et al. (<xref ref-type="bibr" rid="B26">2023</xref>), and then get the representation information <italic>Y</italic> &#x0003D; {<italic>x, z</italic>, &#x003B1;, &#x003B2;, &#x003D5;, &#x003B8;} according to Equation (2).</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>x</mml:mi></mml:mtd><mml:mtd><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:msubsup><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:msubsup><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mrow></mml:msqrt></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E3"><label>(3)</label><mml:math id="M4"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>z</mml:mi></mml:mtd><mml:mtd><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>z</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E4"><label>(4)</label><mml:math id="M5"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>&#x003C6;</mml:mi></mml:mtd><mml:mtd><mml:mo>=</mml:mo><mml:mtext class="textrm" mathvariant="normal">arctan</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo>/</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E5"><label>(5)</label><mml:math id="M6"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>&#x003B8;</mml:mi></mml:mtd><mml:mtd><mml:mo>=</mml:mo><mml:mtext class="textrm" mathvariant="normal">arctan</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>3</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>/</mml:mo><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>3</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E6"><label>(6)</label><mml:math id="M7"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>&#x003B1;</mml:mi></mml:mtd><mml:mtd><mml:mo>=</mml:mo><mml:mo>&#x0003C;</mml:mo><mml:msubsup><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo>-</mml:mo><mml:mtext class="textrm" mathvariant="normal">sin</mml:mtext><mml:mi>&#x003C6;</mml:mi><mml:mo>,</mml:mo><mml:mtext class="textrm" mathvariant="normal">cos</mml:mtext><mml:mi>&#x003C6;</mml:mi><mml:mo>,</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0003E;</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E7"><label>(7)</label><mml:math id="M8"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>&#x003B2;</mml:mi></mml:mtd><mml:mtd><mml:mo>=</mml:mo><mml:mo>&#x0003C;</mml:mo><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>z</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0003E;</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <inline-formula><mml:math id="M9"><mml:msubsup><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> can be denoted as Equation (8).</p>
<disp-formula id="E8"><label>(8)</label><mml:math id="M10"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi><mml:mi>o</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">cos</mml:mtext><mml:mi>&#x003B8;</mml:mi></mml:mtd><mml:mtd><mml:mo>-</mml:mo><mml:mtext class="textrm" mathvariant="normal">sin</mml:mtext><mml:mi>&#x003B8;</mml:mi></mml:mtd><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Through the aforementioned method, we can transform the obtained homogeneous transformation matrices into geometric feature representation information.</p>
<p>However, since the sensory information from human demonstrations is not always perfect, before extracting motion primitives from the sequence <italic>Y</italic>(<italic>N</italic>) of relative pose geometric representations, we first apply DTW to the trajectories. After DTW processing, we interpolate the geometric representation sequence as <italic>Y</italic><sup><italic>DTW</italic></sup>(<italic>N</italic>&#x02032;). In this context, the DTW distance function is defined as the Euclidean distance between the first four elements of the geometric representation sequence, denoted as <inline-formula><mml:math id="M11"><mml:msubsup><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mn>4</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>z</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x003B1;</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>. The distance between any two sequence points <sup>1</sup><italic>P</italic><sup><italic>DTW</italic></sup> and <sup>2</sup><italic>P</italic><sup><italic>DTW</italic></sup>, and is expressed as Equation (9):</p>
<disp-formula id="E9"><label>(9)</label><mml:math id="M12"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>D</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mo>|</mml:mo><mml:msup><mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:msubsup><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mn>4</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup><mml:msup><mml:mrow><mml:mo>-</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:msubsup><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mn>4</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup><mml:mo>|</mml:mo><mml:mo>|</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>This DTW processing allows us to handle imperfections in the human demonstration&#x00027;s sensory data, enabling us to obtain a more refined geometric representation sequence for further analysis and motion primitive extraction.</p>
<p>Then, for every dimension k of <inline-formula><mml:math id="M13"><mml:msubsup><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mn>4</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>, we calculate the ProMP model, which are denoted as follows.</p>
<disp-formula id="E10"><label>(10)</label><mml:math id="M14"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd><mml:mtd><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mo>&#x003A6;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003F5;</mml:mi></mml:mrow><mml:mrow><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E11"><label>(11)</label><mml:math id="M15"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003C4;</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd><mml:mtd><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x0220F;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mo>&#x003A6;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mo>&#x003A3;</mml:mo></mml:mrow><mml:mrow><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Where <inline-formula><mml:math id="M16"><mml:msub><mml:mrow><mml:mo>&#x003A6;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> represents the basis functions for the four-dimensional geometric representation variable of ProMPs, with n denoting the number of basis functions, and <italic>w</italic><sub><italic>k</italic></sub> as the corresponding weight vector. <inline-formula><mml:math id="M17"><mml:msub><mml:mrow><mml:mi>&#x003F5;</mml:mi></mml:mrow><mml:mrow><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup></mml:mrow></mml:msub><mml:mo>&#x0007E;</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003A3;</mml:mi></mml:mrow><mml:mrow><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>.</p>
<p>Next, we parameterize the weight parameters with Gaussian models &#x003B8;<sub><italic>k</italic></sub> &#x0003D; {<italic>N</italic>(&#x003BC;<sub><italic>w</italic><sub><italic>k</italic></sub></sub>, &#x003A3;<sub><italic>w</italic><sub><italic>k</italic></sub></sub>)}. We then employ the Maximum Likelihood Estimation (MLE) method using the demonstrated trajectories <inline-formula><mml:math id="M18"><mml:msub><mml:mrow></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> to estimate the parameters of the Gaussian model for the weights. Here, <italic>i</italic> &#x02208; {1, 2, .., <italic>N</italic><sub><italic>dem</italic></sub>} represents the index of the demonstrated trajectory. Subsequently, following the method described in Zang et al. (<xref ref-type="bibr" rid="B26">2023</xref>), we estimate the weights of the demonstrated trajectories through linear ridge regression. We use these weights to calculate the Gaussian model for the weight parameters.</p>
</sec>
<sec>
<title>3.2. Behavioral Cloning imitation learning method</title>
<p>In this paper, we will use the ProMPs model based on the geometric feature representation mentioned in Section 3.1 as the foundational model for generating operational knowledge samples under sparse demonstration data. We will then employ the BC method, using the knowledge samples to train a neural network for learning actions in different states. This is done to achieve the goal of data augmentation from demonstrations.</p>
<p>To facilitate practical applications, the input to the BC neural network consists of a seven-dimensional array <italic>s</italic>, composed of the three-dimensional relative position and the four-dimensional relative pose between the current peg and hole components. This is represented as follows:</p>
<disp-formula id="E12"><label>(12)</label><mml:math id="M19"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>z</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>q</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>q</mml:mi></mml:mrow><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>q</mml:mi></mml:mrow><mml:mrow><mml:mi>z</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>q</mml:mi></mml:mrow><mml:mrow><mml:mi>w</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>The output of the neural network corresponds to the executed actions and is structured as a six-dimensional array <italic>a</italic> as Equation (13). This array represents the change in the three-dimensional position and the change in the three-dimensional orientation angles of the end-effector.</p>
<disp-formula id="E13"><label>(13)</label><mml:math id="M20"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>a</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mo>&#x00394;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x00394;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x00394;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>z</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x00394;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x00394;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x00394;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>z</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Here, &#x00394;<italic>p</italic> represent the changes in the end-effector&#x00027;s three-dimensional position, and &#x00394;&#x003B8; represent the changes in its three-dimensional orientation angles. These output values provide the necessary information for executing actions in the context of the assembly task.</p>
<p>Because ProMPs can facilitate the learning of trajectory shapes, especially when waypoints are specified, it outperforms other imitation learning methods in terms of trajectory shape learning. In the case of neural network imitation learning methods based on state and action modeling, when a specific state <italic>s</italic> is determined, it is equivalent to specifying a waypoint on the trajectory. Consequently, ProMPs can be used to determine the knowledge action accordingly. However, one challenge that remains is how to determine the time point at which these waypoints occur.</p>
<p>In this paper, we will divide the trajectories aligned by DTW into several time stages. Then, we will use Gaussian models of each time stage to estimate which time stage the current state approximately corresponds to. This allows us to roughly determine the time point associated with the current state.</p>
<p>We suppose the geometric representation information corresponding to the trajectory state in a certain time stage is <inline-formula><mml:math id="M21"><mml:msup><mml:mrow><mml:mi>Y</mml:mi></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>, where <italic>t</italic><sub><italic>start</italic></sub> &#x02264; <italic>t</italic><sub><italic>s</italic></sub> &#x02264; <italic>t</italic><sub><italic>end</italic></sub>. These constraints in Equation (14) are used to ensure that the time stages are appropriately divided and cover the entire trajectory duration.</p>
<disp-formula id="E14"><label>(14)</label><mml:math id="M22"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mtable columnalign='left'><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mrow><mml:mo>&#x02200;</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mrow><mml:mo>&#x02016;</mml:mo><mml:mrow><mml:msubsup><mml:mi>Y</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mn>4</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:mo>&#x02212;</mml:mo><mml:msubsup><mml:mi>Y</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mn>4</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy='false'>)</mml:mo></mml:mrow><mml:mo>&#x02016;</mml:mo></mml:mrow><mml:mo>&#x02264;</mml:mo><mml:msub><mml:mi>l</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>&#x02264;</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mrow></mml:mrow></mml:math></disp-formula>
<p>Under the constraints mentioned above, we select the longest time stage, which divides the trajectory into different time stages. Here, we assume that there are <italic>N</italic><sub><italic>t</italic></sub> time stages.</p>
<p>We extract samples from different time stages <inline-formula><mml:math id="M23"><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mn>4</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> of the demonstrated trajectory and compute gaussian models <inline-formula><mml:math id="M24"><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow><mml:mrow><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mn>4</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003A3;</mml:mi></mml:mrow><mml:mrow><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mn>4</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> for these samples in each time stage. After obtaining the representation information <inline-formula><mml:math id="M25"><mml:msubsup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mn>4</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi><mml:mi>T</mml:mi><mml:mi>W</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> the current state corresponding to, we calculate the probability that the current sample point belongs to each time stage. The time stage with the highest probability is considered the current time stage. The specific time point can be chosen from within the current time stage, and in this paper, we directly select the midpoint of the time stage.</p>
<p>Once the time point is determined, we set waypoints in ProMPs and generate a target trajectory. However, we don&#x00027;t need to know all the trajectory information; we only need to use the trajectory point at a specific time within the next time stage as the target point. Then, we calculate the action <italic>a</italic> required to reach the target point, which serves as the knowledge information sample.</p>
<p>After obtaining the knowledge action sample, we use the BC method to imitate and learn the assembly skills from the demonstrated knowledge, thereby acquiring the peg-in-hole assembly skill.</p>
</sec>
</sec>
<sec id="s4">
<title>4. Experiments</title>
<p>To validate the effectiveness of our proposed assembly skill imitation learning method, we conducted both simulation experiments and real robot experiments. Below are the details of the experimental setup and results.</p>
<sec>
<title>4.1. Acquisition of demonstration information</title>
<p>To better record human demonstration information and minimize the impact of robot stalling and damping during the teaching process, we utilized motion capture equipment to capture the relative poses of the peg and hole during the human assembly process. The experimental setup is illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>, while detailed schematics of the markers and the peg-in-hole components are shown in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>The picture of demonstration data acquisition equipments and the assembly parts.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnbot-17-1320251-g0001.tif"/>
</fig>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>The picture of the assembly parts and the distribution of marker points.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnbot-17-1320251-g0002.tif"/>
</fig>
<p>During the data acquisition process for demonstration information, we manually held the peg component and executed the assembly strategy, which involved approaching the hole component along the axis of the hole and inserting it. We performed multiple assembly operations while capturing the position information of the markers. The trajectories of all the markers obtained from these operations are depicted in <xref ref-type="fig" rid="F3">Figure 3</xref>.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>The recorded trajectories of the marker points during the human assembly demonstration.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnbot-17-1320251-g0003.tif"/>
</fig>
<p>By using the positions of four square-distributed markers, we calculated the homogeneous transformation matrix representing the pose of the peg component during the assembly process. Finally, from the obtained sensor data, we extracted the raw trajectory information of the demonstrated operation. After processing, the trajectory plot is shown in <xref ref-type="fig" rid="F4">Figure 4</xref>.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>The processed trajectory data which denoted the pose of the peg part during the assembly demonstration.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnbot-17-1320251-g0004.tif"/>
</fig>
</sec>
<sec>
<title>4.2. Verify experiment of assembly skills imitaiton learning</title>
<p>After obtaining the raw trajectory information, we mapped the trajectories into the geometric representation space and applied DTW for alignment. The aligned trajectories in the geometric representation space are depicted in <xref ref-type="fig" rid="F5">Figure 5</xref>. In this figure, it can be observed that the demonstrated operation trajectories align effectively within the geometric feature representation space, forming a concentrated set of knowledge trajectories.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>The aligned trajectories (processed by DTW) in geometric representation space. <bold>(A&#x02013;D)</bold> Are the demonstration trajectories for different dimensions under geometric representation.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnbot-17-1320251-g0005.tif"/>
</fig>
<p>As a comparison, we also aligned the trajectories in cartesian representation space, as shown in <xref ref-type="fig" rid="F6">Figure 6</xref>.</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>The aligned trajectories (processed by DTW) in the cartesian representation space. <bold>(A&#x02013;F)</bold> Are the demonstration trajectories in different cartesian dimensions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnbot-17-1320251-g0006.tif"/>
</fig>
<p>As shown in the figure, under the geometric feature representation, the demonstrated trajectories are more easily aligned, resulting in a concentrated set of assembly skills.</p>
<p>We extracted ProMPs from the trajectory representation information processed with DTW and generated some random trajectories based on the model, as shown in <xref ref-type="fig" rid="F7">Figure 7</xref>.</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>The ProMPs model in geometric representation space extracted from the aligned trajectories and the trajectories generated from them. <bold>(A&#x02013;D)</bold> Are the ProMPs model for different dimensions under geometric representation.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnbot-17-1320251-g0007.tif"/>
</fig>
<p>In comparison, we directly utilized the trajectories obtained in cartesian space representation to extract ProMPs and generated some random trajectories based on this model, as shown in <xref ref-type="fig" rid="F8">Figure 8</xref>.</p>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>The ProMPs model in cartesian representation space extracted from the aligned trajectories and the trajectories generated from them. <bold>(A&#x02013;F)</bold> Are the ProMPs model for different cartesian dimensions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnbot-17-1320251-g0008.tif"/>
</fig>
<p>After obtaining the ProMPs model, we sampled several points in different task space representations. We used the current sampling point as a through point, generated task trajectories through this sampling point using the ProMPs model, and then took the target pose from the next time stage as the target, resulting in action sampling. We used this data to train a Behavioral Cloning neural network model. This way, we conducted two sets of imitation learning experiments in both the geometric representation task space and the Cartesian task space.</p>
<p>Additionally, we set up three sets of control experiments. The first one training a neural network for BC using only action samples from demonstrated trajectories. The second one used ProMPs under geometric representation to generate trajectories. The last one used ProMPs under cartesian representation to generate trajectories. In the end, we had the following five groups of experiments:</p>
<list list-type="order">
<list-item><p>EXP_1: ProMPs model in task representation space &#x0002B; BC imitation learning.</p></list-item>
<list-item><p>EXP_2: ProMPs model in Cartesian representation space &#x0002B; BC imitation learning.</p></list-item>
<list-item><p>EXP_3: Naive BC imitation learning.</p></list-item>
<list-item><p>EXP_4: ProMPs model in geometric representation space.</p></list-item>
<list-item><p>EXP_5: ProMPs model in Cartesian representation space.</p></list-item>
</list>
<p>We trained the neural network in an environment based on Ubuntu and PyTorch. The neural network had six layers, with 100 neurons in each layer. The input to the neural network consisted of the current relative pose of the components, and the output was a six-dimensional action.</p>
<p>We conducted simulation experiments in the Pybullet environment, as shown in <xref ref-type="fig" rid="F9">Figure 9</xref>. The experimental subject was the Franka Panda robot. We calculated the target pose based on the current action&#x00027;s position and angle increment and then calculated the robot&#x00027;s joint angle changes based on the target pose for position control. We used the Trac Ik method to calculate the robot&#x00027;s inverse kinematics.</p>
<fig id="F9" position="float">
<label>Figure 9</label>
<caption><p>The picture of the simulation platform.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnbot-17-1320251-g0009.tif"/>
</fig>
<p>The assembly components used have a diameter of 20 mm and consist of pegs and holes with a matching length of 12 mm. The gap between the pegs and holes is &#x0003C;0.5 mm. We randomly selected 500 sets of random state data within the task space and conducted assembly experiments for each of the four configurations. We choose 200 successful generated trajectories of each experiment randomly and show them in <xref ref-type="fig" rid="F10">Figure 10</xref>.</p>
<fig id="F10" position="float">
<label>Figure 10</label>
<caption><p>Two hundred of the success trajectories of each simulation experiment. <bold>(A&#x02013;E)</bold> Is the verify experiment result for EXP_1&#x02013;EXP_5.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnbot-17-1320251-g0010.tif"/>
</fig>
<p>The success rate of each experiment as well as the variance of it are shown in <xref ref-type="fig" rid="F11">Figure 11</xref>.</p>
<fig id="F11" position="float">
<label>Figure 11</label>
<caption><p>The success rate of each experiment as well as the variance of it.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnbot-17-1320251-g0011.tif"/>
</fig>
<p>Additionally, we conducted generalization experiments for assembly components of different sizes as part of Experiment 1. This included assembly sizes with diameters of 10 mm (Gen_1) and 15 mm (Gen_2). Two hundred success trajectories are also chosen randomly from the generalization experiment result for each grop, which are shown in <xref ref-type="fig" rid="F12">Figure 12</xref>.</p>
<fig id="F12" position="float">
<label>Figure 12</label>
<caption><p>Two hundred of the success trajectories of each generalization experiment. <bold>(A)</bold> Is the generalization experiment result of peg-in-hole assembly task with 10 mm radius. <bold>(B)</bold> Is the result of peg-in-hole assembly task with 15 mm radius.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnbot-17-1320251-g0012.tif"/>
</fig>
<p>Finally, we performed real-world robot generalization experiments using the assembly actions learned from Experiment 1. The experimental setup and results in the form of time-series graphs are illustrated in <xref ref-type="fig" rid="F13">Figure 13</xref>. Notalby, in real world experiment, we use the impandance control instead of position control because of the collision of the peg-in-hole task. We demonstrated that our proposed imitation learning method for robot peg-in-hole assembly tasks, based on geometric representations and ProMPs, can effectively learn assembly strategies and achieve higher performance compared to traditional methods.</p>
<fig id="F13" position="float">
<label>Figure 13</label>
<caption><p>The picture of the real world experiment platform and the snapshots.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnbot-17-1320251-g0013.tif"/>
</fig>
</sec>
<sec>
<title>4.3. Discussion</title>
<p>For the simulation experiments, we found that the imitation learning method based solely on the ProMPs approach had certain limitations in global task learning, whether in the task representation or Cartesian representation. This suggests that the ProMPs model itself has limitations in trajectory generalization. In other words, when the selected state as a passing point significantly deviates from the original ProMPs model&#x00027;s trajectory, the generalized trajectories generated by the ProMPs model may struggle to meet the requirements of tasks with rich contacts. Additionally, the similar success rates obtained in Experiment 4 and Experiment 5 indicate that the ProMPs model&#x00027;s performance does not differ significantly in different task representations. We believe this is because both experiments are based on learning the same demonstrated trajectories, so the learned trajectory shapes are generally similar regardless of the representation.</p>
<p>However, this does not imply that the ProMPs model cannot be an effective method for obtaining generalized trajectories. On the contrary, the higher success rate achieved in Experiment 1 using the ProMPs model with geometric feature representation for BC imitation learning suggests that the ProMPs model can yield high success rates when used in combination with specific task representations. Nonetheless, we observe that even when both experiments use the ProMPs model for BC imitation learning, the success rate in the Cartesian space representation is not ideal. We attribute this to two factors. First, the ProMPs model in Cartesian space has higher variance, and the knowledge is not as concentrated. Additionally, the knowledge in the geometric feature representation is more concise, making it easier to obtain a uniform global skill, which is beneficial for neural network learning. Therefore, we conclude that geometric feature representation plays a crucial role in neural network-based task learning.</p>
<p>Finally, the surprising results obtained in Experiment 3 with the naive BC imitation learning agent, although slightly inferior to Experiment 1, can be attributed to the simplicity of the provided skill. Moreover, the demonstration data effectively covered the task space, allowing the neural network&#x00027;s generalization capabilities to be effectively utilized, leading to better results.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="s5">
<title>5. Conclusion</title>
<p>In this paper, we have introduced a robot assembly task imitation learning method based on ProMPs under a task-specific representation. This method involves the use of probabilistic movement primitives in the geometric feature representation and BC imitation learning based on the ProMPs model. We have conducted comparative experiments to demonstrate the effectiveness of our proposed approach and provided generalization experiments. Finally, we have analyzed the experimental results and offered our insights into the factors contributing to the outcomes in different experiments.</p>
<p>While the method presented in this paper has shown promising results in the current task setting, it still has some limitations in practical applications. Therefore, in future work, we aim to integrate the skill modeling capabilities of imitation learning methods with the generalization capabilities of neural networks. We plan to investigate assembly task learning methods for different task settings to address these limitations and enhance the applicability of the approach.</p>
</sec>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="ethics-statement" id="s7">
<title>Ethics statement</title>
<p>Written informed consent was obtained from the individual(s) for the publication of any identifiable images or data included in this article.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>YZ: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Software, Validation, Visualization, Writing&#x02014;original draft. PW: Conceptualization, Investigation, Project administration, Supervision, Writing&#x02014;review &#x00026; editing. FZ: Conceptualization, Data curation, Formal analysis, Funding acquisition, Investigation, Methodology, Project administration, Resources, Supervision, Writing&#x02014;review &#x00026; editing. WG: Conceptualization, Data curation, Investigation, Methodology, Project administration, Supervision, Writing&#x02014;review &#x00026; editing. CZ: Funding acquisition, Resources, Supervision, Writing&#x02014;review &#x00026; editing. LS: Conceptualization, Funding acquisition, Investigation, Methodology, Project administration, Resources, Supervision, Writing&#x02014;review &#x00026; editing.</p>
</sec>
</body>
<back>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work was supported by the Basic Research on Free Exploration of Shenzhen Virtual University Park (2021Szvup085), National Natural Science Foundation of China (U2013602, 52075115, 51521003, and 61911530250), National Key R&#x00026;D Program of China (2020YFB13134, 2020YFB1313401, 2020YFB1313403, and 2022YFB4601802), SelfPlanned Task (SKLRS202001B, SKLRS202110B, and SKLRS202301A12) of State Key Laboratory of Robotics and System (HIT), Shenzhen Science and Technology Research and Development Foundation (JCYJ20190813171009236), and Basic Scientific Research of Technology (JCKY2020603C009).</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bhattacharyya</surname> <given-names>R.</given-names></name> <name><surname>Wulfe</surname> <given-names>B.</given-names></name> <name><surname>Phillips</surname> <given-names>D. J.</given-names></name> <name><surname>Kuefler</surname> <given-names>A.</given-names></name> <name><surname>Morton</surname> <given-names>J.</given-names></name> <name><surname>Senanayake</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Modeling human driving behavior through generative adversarial imitation learning</article-title>. <source>IEEE Trans. Intell. Transp. Syst</source>. <volume>24</volume>, <fpage>2874</fpage>&#x02013;<lpage>2887</lpage>. <pub-id pub-id-type="doi">10.1109/TITS.2022.3227738</pub-id></citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bing</surname> <given-names>Z.</given-names></name> <name><surname>Brucker</surname> <given-names>M.</given-names></name> <name><surname>Morin</surname> <given-names>F. O.</given-names></name> <name><surname>Li</surname> <given-names>R.</given-names></name> <name><surname>Su</surname> <given-names>X.</given-names></name> <name><surname>Huang</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2022a</year>). <article-title>Complex robotic manipulation via graph-based hindsight goal generation</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst</source>. <volume>33</volume>, <fpage>7863</fpage>&#x02013;<lpage>7876</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2021.3088947</pub-id><pub-id pub-id-type="pmid">34181552</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bing</surname> <given-names>Z.</given-names></name> <name><surname>Cheng</surname> <given-names>L.</given-names></name> <name><surname>Huang</surname> <given-names>K.</given-names></name> <name><surname>Knoll</surname> <given-names>A.</given-names></name></person-group> (<year>2022b</year>). <article-title>Simulation to real: learning energy-efficient slithering gaits for a snake-like robot</article-title>. <source>IEEE Robot. Autom. Mag</source>. <volume>29</volume>, <fpage>92</fpage>&#x02013;<lpage>103</lpage>. <pub-id pub-id-type="doi">10.1109/MRA.2022.3204237</pub-id></citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bing</surname> <given-names>Z.</given-names></name> <name><surname>Knak</surname> <given-names>L.</given-names></name> <name><surname>Cheng</surname> <given-names>L.</given-names></name> <name><surname>Morin</surname> <given-names>F. O.</given-names></name> <name><surname>Huang</surname> <given-names>K.</given-names></name> <name><surname>Knoll</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2023a</year>). <article-title>Meta-reinforcement learning in nonstationary and nonparametric environments</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst</source>. <fpage>1</fpage>&#x02013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2023.3270298</pub-id><pub-id pub-id-type="pmid">37224358</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bing</surname> <given-names>Z.</given-names></name> <name><surname>Lerch</surname> <given-names>D.</given-names></name> <name><surname>Huang</surname> <given-names>K.</given-names></name> <name><surname>Knoll</surname> <given-names>A.</given-names></name></person-group> (<year>2023b</year>). <article-title>Meta-reinforcement learning in non-stationary and dynamic environments</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell</source>. <volume>45</volume>, <fpage>3476</fpage>&#x02013;<lpage>3491</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.2022.3185549</pub-id><pub-id pub-id-type="pmid">35737617</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bing</surname> <given-names>Z.</given-names></name> <name><surname>Zhou</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>R.</given-names></name> <name><surname>Su</surname> <given-names>X.</given-names></name> <name><surname>Morin</surname> <given-names>F. O.</given-names></name> <name><surname>Huang</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2023c</year>). <article-title>Solving robotic manipulation with sparse reward reinforcement learning via graph-based diversity and proximity</article-title>. <source>IEEE Trans. Ind. Electron</source>. <volume>70</volume>, <fpage>2759</fpage>&#x02013;<lpage>2769</lpage>. <pub-id pub-id-type="doi">10.1109/TIE.2022.3172754</pub-id></citation>
</ref>
<ref id="B7">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>C.</given-names></name> <name><surname>Yang</surname> <given-names>C.</given-names></name> <name><surname>Zeng</surname> <given-names>C.</given-names></name> <name><surname>Wang</surname> <given-names>N.</given-names></name> <name><surname>Li</surname> <given-names>Z.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Robot learning from multiple demonstrations with dynamic movement primitive&#x0201D;</article-title> in <source>2017 2nd International Conference on Advanced Robotics and Mechatronics (ICARM)</source> (<publisher-loc>Hefei and Tai&#x00027;an</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>523</fpage>&#x02013;<lpage>528</lpage>. <pub-id pub-id-type="doi">10.1109/ICARM.2017.8273217</pub-id></citation>
</ref>
<ref id="B8">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ding</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>C.</given-names></name> <name><surname>Lu</surname> <given-names>C.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Transferable trial-minimizing progressive peg-in-hole model,&#x0201D;</article-title> in <source>2019 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)</source> (<publisher-loc>Macau</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>5862</fpage>&#x02013;<lpage>5868</lpage>. <pub-id pub-id-type="doi">10.1109/IROS40897.2019.8968282</pub-id></citation>
</ref>
<ref id="B9">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ehlers</surname> <given-names>D.</given-names></name> <name><surname>Suomalainen</surname> <given-names>M.</given-names></name> <name><surname>Lundell</surname> <given-names>J.</given-names></name> <name><surname>Kyrki</surname> <given-names>V.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Imitating human search strategies for assembly,&#x0201D;</article-title> in <source>2019 International Conference on Robotics and Automation (ICRA)</source> (<publisher-loc>Montreal, QC</publisher-loc>: <publisher-name>IEE</publisher-name>), <fpage>7821</fpage>&#x02013;<lpage>7827</lpage>. <pub-id pub-id-type="doi">10.1109/ICRA.2019.8793780</pub-id></citation>
</ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>Y.</given-names></name> <name><surname>Rozo</surname> <given-names>L.</given-names></name> <name><surname>Silv&#x000E9;rio</surname> <given-names>J.</given-names></name> <name><surname>Caldwell</surname> <given-names>D. G.</given-names></name></person-group> (<year>2019</year>). <article-title>Kernelized movement primitives</article-title>. <source>Int. J. Rob. Res</source>. <volume>38</volume>, <fpage>833</fpage>&#x02013;<lpage>852</lpage>. <pub-id pub-id-type="doi">10.1177/0278364919846363</pub-id></citation>
</ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>Y.</given-names></name> <name><surname>Zheng</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>N.</given-names></name> <name><surname>Ota</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name></person-group> (<year>2020</year>). <article-title>Peg-in-hole assembly based on master-slave coordination for a compliant dual-arm robot</article-title>. <source>Assem. Autom</source>. <volume>40</volume>, <fpage>189</fpage>&#x02013;<lpage>198</lpage>. <pub-id pub-id-type="doi">10.1108/AA-10-2018-0164</pub-id></citation>
</ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>Y.-L.</given-names></name> <name><surname>Ahn</surname> <given-names>K.-H.</given-names></name> <name><surname>Song</surname> <given-names>J.-B.</given-names></name></person-group> (<year>2020</year>). <article-title>Reinforcement learning based on movement primitives for contact tasks</article-title>. <source>Robot. Comput. Integr. Manuf</source>. 62, 101863. <pub-id pub-id-type="doi">10.1016/j.rcim.2019.101863</pub-id></citation>
</ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>M. A.</given-names></name> <name><surname>Zhu</surname> <given-names>Y.</given-names></name> <name><surname>Zachares</surname> <given-names>P.</given-names></name> <name><surname>Tan</surname> <given-names>M.</given-names></name> <name><surname>Srinivasan</surname> <given-names>K.</given-names></name> <name><surname>Savarese</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Making sense of vision and touch: learning multimodal representations for contact-rich tasks</article-title>. <source>IEEE Trans. Robot</source>. <volume>36</volume>, <fpage>582</fpage>&#x02013;<lpage>596</lpage>. <pub-id pub-id-type="doi">10.1109/TRO.2019.2959445</pub-id></citation>
</ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>G.</given-names></name> <name><surname>Ji</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>S.</given-names></name> <name><surname>Luo</surname> <given-names>X.</given-names></name> <name><surname>Qu</surname> <given-names>X.</given-names></name></person-group> (<year>2023</year>). <article-title>Driver behavioral cloning for route following in autonomous vehicles using task knowledge distillation</article-title>. <source>IEEE Trans. Intell. Veh</source>. <volume>8</volume>, <fpage>1025</fpage>&#x02013;<lpage>1033</lpage>. <pub-id pub-id-type="doi">10.1109/TIV.2022.3198678</pub-id></citation>
</ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Zheng</surname> <given-names>X.</given-names></name> <name><surname>Dai</surname> <given-names>Y.</given-names></name> <name><surname>Yu</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>J. J.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Supervised assisted deep reinforcement learning for emergency voltage control of power systems</article-title>. <source>Neurocomputing</source> <volume>475</volume>, <fpage>69</fpage>&#x02013;<lpage>79</lpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2021.12.043</pub-id></citation>
</ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>N.</given-names></name> <name><surname>Zhou</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name> <name><surname>Cui</surname> <given-names>L.</given-names></name></person-group> (<year>2020</year>). <article-title>Learning peg-in-hole assembly using cartesian dmps with feedback mechanism</article-title>. <source>Assem. Autom</source>. <volume>40</volume>, <fpage>895</fpage>&#x02013;<lpage>904</lpage>. <pub-id pub-id-type="doi">10.1108/AA-04-2020-0053</pub-id></citation>
</ref>
<ref id="B17">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>J.</given-names></name> <name><surname>Solowjow</surname> <given-names>E.</given-names></name> <name><surname>Wen</surname> <given-names>C.</given-names></name> <name><surname>Ojea</surname> <given-names>J. A.</given-names></name> <name><surname>Agogino</surname> <given-names>A. M.</given-names></name> <name><surname>Tamar</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>&#x0201C;Reinforcement learning on variable impedance controller for high-precision robotic assembly,&#x0201D;</article-title> in <source>2019 International Conference on Robotics and Automation (ICRA)</source> (<publisher-loc>Montreal, QC</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>3080</fpage>&#x02013;<lpage>3087</lpage>. <pub-id pub-id-type="doi">10.1109/ICRA.2019.8793506</pub-id></citation>
</ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Paraschos</surname> <given-names>A.</given-names></name> <name><surname>Daniel</surname> <given-names>C.</given-names></name> <name><surname>Peters</surname> <given-names>J.</given-names></name> <name><surname>Neumann</surname> <given-names>G.</given-names></name></person-group> (<year>2018</year>). <article-title>Using probabilistic movement primitives in robotics</article-title>. <source>Auton. Robots</source> <volume>42</volume>, <fpage>1573</fpage>&#x02013;<lpage>7527</lpage>. <pub-id pub-id-type="doi">10.1007/s10514-017-9648-7</pub-id></citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Park</surname> <given-names>H.</given-names></name> <name><surname>Park</surname> <given-names>J.</given-names></name> <name><surname>Lee</surname> <given-names>D.-H.</given-names></name> <name><surname>Park</surname> <given-names>J.-H.</given-names></name> <name><surname>Baeg</surname> <given-names>M.-H.</given-names></name> <name><surname>Bae</surname> <given-names>J.-H.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Compliance-based robotic peg-in-hole assembly strategy without force feedback</article-title>. <source>IEEE Trans. Ind. Electron</source>. <volume>64</volume>, <fpage>6299</fpage>&#x02013;<lpage>6309</lpage>. <pub-id pub-id-type="doi">10.1109/TIE.2017.2682002</pub-id></citation>
</ref>
<ref id="B20">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Rozo</surname> <given-names>L.</given-names></name> <name><surname>Bruno</surname> <given-names>D.</given-names></name> <name><surname>Calinon</surname> <given-names>S.</given-names></name> <name><surname>Caldwell</surname> <given-names>D. G.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;Learning optimal controllers in human-robot cooperative transportation tasks with position and force constraints,&#x0201D;</article-title> in <source>2015 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)</source> (<publisher-loc>Hamburg</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1024</fpage>&#x02013;<lpage>1030</lpage>. <pub-id pub-id-type="doi">10.1109/IROS.2015.7353496</pub-id></citation>
</ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rozo</surname> <given-names>L.</given-names></name> <name><surname>Calinon</surname> <given-names>S.</given-names></name> <name><surname>Caldwell</surname> <given-names>D. G.</given-names></name> <name><surname>Jim&#x000E9;nez</surname> <given-names>P.</given-names></name> <name><surname>Torras</surname> <given-names>C.</given-names></name></person-group> (<year>2016</year>). <article-title>Learning physical collaborative robot behaviors from human demonstrations</article-title>. <source>IEEE Trans. Robot</source>. <volume>32</volume>, <fpage>513</fpage>&#x02013;<lpage>527</lpage>. <pub-id pub-id-type="doi">10.1109/TRO.2016.2540623</pub-id></citation>
</ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Silv&#x000E9;rio</surname> <given-names>J.</given-names></name> <name><surname>Calinon</surname> <given-names>S.</given-names></name> <name><surname>Rozo</surname> <given-names>L.</given-names></name> <name><surname>Caldwell</surname> <given-names>D. G.</given-names></name></person-group> (<year>2019</year>). <article-title>Learning task priorities from demonstrations</article-title>. <source>IEEE Trans. Robot</source>. <volume>35</volume>, <fpage>78</fpage>&#x02013;<lpage>94</lpage>. <pub-id pub-id-type="doi">10.1109/TRO.2018.2878355</pub-id></citation>
</ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tsurumine</surname> <given-names>Y.</given-names></name> <name><surname>Matsubara</surname> <given-names>T.</given-names></name></person-group> (<year>2022</year>). <article-title>Goal-aware generative adversarial imitation learning from imperfect demonstration for robotic cloth manipulation</article-title>. <source>Rob. Auton. Syst</source>. 158, 104264. <pub-id pub-id-type="doi">10.1016/j.robot.2022.104264</pub-id></citation>
</ref>
<ref id="B24">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Tsuruoka</surname> <given-names>T.</given-names></name> <name><surname>Fujioka</surname> <given-names>H.</given-names></name> <name><surname>Moriyama</surname> <given-names>T.</given-names></name> <name><surname>Mayeda</surname> <given-names>H.</given-names></name></person-group> (<year>1997</year>). <article-title>&#x0201C;3d analysis of contact in peg-hole insertion,&#x0201D;</article-title> in <source>Proceedings of the 1997 IEEE International Symposium on Assembly and Task Planning (ISATP&#x00027;97) - Towards Flexible and Agile Assembly and Manufacturing</source> (<publisher-loc>Marina del Rey, CA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>84</fpage>&#x02013;<lpage>89</lpage>. <pub-id pub-id-type="doi">10.1109/ISATP.1997.615389</pub-id></citation>
</ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>C.</given-names></name> <name><surname>Zeng</surname> <given-names>C.</given-names></name> <name><surname>Cong</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>N.</given-names></name> <name><surname>Wang</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>A learning framework of adaptive manipulative skills from human to robot</article-title>. <source>IEEE Trans Ind. Inform</source>. <volume>15</volume>, <fpage>1153</fpage>&#x02013;<lpage>1161</lpage>. <pub-id pub-id-type="doi">10.1109/TII.2018.2826064</pub-id></citation>
</ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zang</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>P.</given-names></name> <name><surname>Zha</surname> <given-names>F.</given-names></name> <name><surname>Guo</surname> <given-names>W.</given-names></name> <name><surname>Ruan</surname> <given-names>S.</given-names></name> <name><surname>Sun</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Geometric-feature representation based pre-training method for reinforcement learning of peg-in-hole tasks</article-title>. <source>IEEE Robot. Autom. Lett</source>. <volume>8</volume>, <fpage>3478</fpage>&#x02013;<lpage>3485</lpage>. <pub-id pub-id-type="doi">10.1109/LRA.2023.3261759</pub-id></citation>
</ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Zhao</surname> <given-names>L.</given-names></name> <name><surname>Liu</surname> <given-names>H.</given-names></name></person-group> (<year>2023</year>). <article-title>A learning-based two-stage method for submillimeter insertion tasks with only visual inputs</article-title>. <source>IEEE Trans. Ind. Electr</source>. <fpage>1</fpage>&#x02013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1109/TIE.2023.3299051</pub-id></citation>
</ref>
</ref-list>
</back>
</article>