<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Robot. AI</journal-id>
<journal-title>Frontiers in Robotics and AI</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Robot. AI</abbrev-journal-title>
<issn pub-type="epub">2296-9144</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">832208</article-id>
<article-id pub-id-type="doi">10.3389/frobt.2022.832208</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Robotics and AI</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Robotic Endoscope Control Via Autonomous Instrument Tracking</article-title>
<alt-title alt-title-type="left-running-head">Gruijthuijsen et al.</alt-title>
<alt-title alt-title-type="right-running-head">Autonomous Instrument Tracking</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Gruijthuijsen</surname>
<given-names>Caspar</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1632393/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Garcia-Peraza-Herrera</surname>
<given-names>Luis C.</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1545337/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Borghesan</surname>
<given-names>Gianni</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1685341/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Reynaerts</surname>
<given-names>Dominiek</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Deprest</surname>
<given-names>Jan</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1598131/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ourselin</surname>
<given-names>Sebastien</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/222555/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Vercauteren</surname>
<given-names>Tom</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/639928/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Vander Poorten</surname>
<given-names>Emmanuel</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/142797/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Mechanical Engineering</institution>, <institution>KU Leuven</institution>, <addr-line>Leuven</addr-line>, <country>Belgium</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Medical Physics and Biomedical Engineering</institution>, <institution>University College London</institution>, <addr-line>London</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Surgical and Interventional Engineering</institution>, <institution>King&#x2019;s College London</institution>, <addr-line>London</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Core Lab ROB, Flanders Make</institution>, <addr-line>Lommel</addr-line>, <country>Belgium</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Department of Development and Regeneration</institution>, <institution>Division Woman and Child</institution>, <institution>KU Leuven</institution>, <addr-line>Leuven</addr-line>, <country>Belgium</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/525016/overview">Daniele Cafolla</ext-link>, Mediterranean Neurological Institute Neuromed (IRCCS), Italy</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1382972/overview">Juan Sandoval</ext-link>, University of Poitiers, France</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1021085/overview">Luigi Pavone</ext-link>, Mediterranean Neurological Institute Neuromed (IRCCS), Italy</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Luis C. Garcia-Peraza-Herrera, <email>luis_c.garcia_peraza_herrera@kcl.ac.uk</email>
</corresp>
<fn fn-type="equal" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work and share first authorship</p>
</fn>
<fn fn-type="other">
<p>This article was submitted to Biomedical Robotics, a section of the journal Frontiers in Robotics and AI</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>11</day>
<month>04</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>9</volume>
<elocation-id>832208</elocation-id>
<history>
<date date-type="received">
<day>09</day>
<month>12</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>17</day>
<month>02</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Gruijthuijsen, Garcia-Peraza-Herrera, Borghesan, Reynaerts, Deprest, Ourselin, Vercauteren and Vander Poorten.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Gruijthuijsen, Garcia-Peraza-Herrera, Borghesan, Reynaerts, Deprest, Ourselin, Vercauteren and Vander Poorten</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Many keyhole interventions rely on bi-manual handling of surgical instruments, forcing the main surgeon to rely on a second surgeon to act as a camera assistant. In addition to the burden of excessively involving surgical staff, this may lead to reduced image stability, increased task completion time and sometimes errors due to the monotony of the task. Robotic endoscope holders, controlled by a set of basic instructions, have been proposed as an alternative, but their unnatural handling may increase the cognitive load of the (solo) surgeon, which hinders their clinical acceptance. More seamless integration in the surgical workflow would be achieved if robotic endoscope holders collaborated with the operating surgeon <italic>via</italic> semantically rich instructions that closely resemble instructions that would otherwise be issued to a human camera assistant, such as &#x201c;focus on my right-hand instrument.&#x201d; As a proof of concept, this paper presents a novel system that paves the way towards a synergistic interaction between surgeons and robotic endoscope holders. The proposed platform allows the surgeon to perform a bimanual coordination and navigation task, while a robotic arm autonomously performs the endoscope positioning tasks. Within our system, we propose a novel tooltip localization method based on surgical tool segmentation and a novel visual servoing approach that ensures smooth and appropriate motion of the endoscope camera. We validate our vision pipeline and run a user study of this system. The clinical relevance of the study is ensured through the use of a laparoscopic exercise validated by the European Academy of Gynaecological Surgery which involves bi-manual coordination and navigation. Successful application of our proposed system provides a promising starting point towards broader clinical adoption of robotic endoscope holders.</p>
</abstract>
<kwd-group>
<kwd>minimally invasive surgery</kwd>
<kwd>endoscope holders</kwd>
<kwd>endoscope robots</kwd>
<kwd>endoscope control</kwd>
<kwd>visual servoing</kwd>
<kwd>instrument tracking</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>In recent years, many surgical procedures shifted from open surgery to minimally invasive surgery (MIS). Although MIS offers excellent advantages for the patient, including reduced scarring and faster recovery, it comes with challenges for the surgical team. Most notable is the loss of direct view onto the surgical site. In keyhole surgery, the surgeon manipulates long and slender instruments introduced into the patient through small incisions or keyholes. The surgeon relies on endoscopes, also long and slender instruments equipped with a camera and light source, to obtain visual feedback on the scene and the relative pose of the other instruments. The limited field of view (FoV) and depth of field of the endoscope urge an efficient endoscope manipulation method that allows covering all the important features and hereto optimizes the view at all times.</p>
<p>In typical MIS, surgeons cannot manipulate the endoscope themselves as their hands are occupied with other instruments. Therefore, a camera assistant, typically another surgeon takes charge of handling the endoscope. Human camera assistants have a number of shortcomings. An important drawback relates to the cost of the human camera assistant (<xref ref-type="bibr" rid="B67">Stott et al., 2017</xref>). Arguably, highly trained clinicians could better be assigned to other surgical duties that require the full extent of their skill set (as opposed to mainly manipulating the endoscope). If made widely feasible, solo MIS surgery would improve cost-effectiveness and staffing efficiency. An additional source of weakness related to human camera assistants is the ergonomic burden associated with assisting in MIS (<xref ref-type="bibr" rid="B77">Wauben et al., 2006</xref>; <xref ref-type="bibr" rid="B43">Lee et al., 2009</xref>). This may lead to reduced image stability, fatigue, distractions, increased task completion times, and erroneous involuntary movements (<xref ref-type="bibr" rid="B31">Goodell et al., 2006</xref>; <xref ref-type="bibr" rid="B53">Platte et al., 2019</xref>; <xref ref-type="bibr" rid="B58">Rodrigues Armijo et al., 2020</xref>). This problem aggravates for long interventions or when the assistant has to adopt particularly uncomfortable postures. Besides the ergonomic challenges, miscommunication between the surgeon and the assistant may lead to sub-optimal views (<xref ref-type="bibr" rid="B6">Amin et al., 2020</xref>).</p>
<p>In order to help or bypass the human camera assistant and to optimize image stability, numerous endoscope holders have been designed in the past (<xref ref-type="bibr" rid="B36">Jaspers et al., 2004</xref>; <xref ref-type="bibr" rid="B9">Bihlmaier, 2016</xref>; <xref ref-type="bibr" rid="B69">Takahashi, 2020</xref>). One can distinguish passive endoscope holders and active or robotic endoscope holders. Passive endoscope holders are mechanical devices that lock the endoscope in a given position until manually unlocked and adjusted. A problem common to passive endoscope holders is that they result in an intermittent operation that interferes with the manipulation task (<xref ref-type="bibr" rid="B36">Jaspers et al., 2004</xref>). When surgeons want to adjust the endoscopic view themselves, they will have to free one or both hands to reposition the endoscope. To counter this problem, robotic endoscope holders have been developed. These motorized devices offer the surgeon a dedicated interface to control the endoscope pose. Well-designed robotic endoscope holders do not cause additional fatigue, improve image stability, and increase ergonomics (<xref ref-type="bibr" rid="B25">Fujii et al., 2018</xref>). Also, hand-eye coordination issues may be avoided. Overall such robotic endoscope holders may lower the cognitive load of the surgeon and reduce operating room (OR) staff time and intervention cost (<xref ref-type="bibr" rid="B4">Ali et al., 2018</xref>). However, despite these advantages and the number of systems available, robotic endoscope holders have not found widespread clinical acceptance (<xref ref-type="bibr" rid="B9">Bihlmaier, 2016</xref>). This has been linked to the suboptimal nature of the human interface and consequently the discomfort caused to the surgeon by the increased cognitive load needed to control the camera. Popular robotic endoscope holders use foot pedals, joysticks, voice control, gaze control, and head movements (<xref ref-type="bibr" rid="B40">Kommu et al., 2007</xref>; <xref ref-type="bibr" rid="B34">Holl&#xe4;nder et al., 2014</xref>; <xref ref-type="bibr" rid="B25">Fujii et al., 2018</xref>). The context switching between surgical manipulation and these camera control mechanisms seems to hinder the ability of the surgeon to concentrate on the main surgical task (<xref ref-type="bibr" rid="B9">Bihlmaier, 2016</xref>).</p>
<sec id="s1-1">
<title>1.1 Contributions</title>
<p>In this work, we introduce the framework of <italic>semantically rich endoscope control</italic>, which is our proposal on how robotic endoscope control could be implemented to mitigate interruptions and maximize the clinical acceptance of robotic endoscope holders. We claim that <italic>semantically rich instructions</italic> that relate to the instruments such as &#x201c;focus on the right/left instrument&#x201d; and &#x201c;focus on a point between the instruments&#x201d; are a priority, as they are shared among a large number of surgical procedures. Therefore, we present a novel system that paves the way towards a synergistic interaction between surgeons and robotic endoscope holders. To the best of our knowledge, we are the first to report how to construct an autonomous instrument tracking system that allows for solo-surgery using only the endoscope as a sensor to track the surgical tools. The proposed platform allows the surgeon to perform a bi-manual coordination and navigation tasks while the robotic arm autonomously performs the endoscope positioning.</p>
<p>Within our proposed platform, we introduce a novel tooltip localization method based on a hybrid mixture of deep learning and classical computer vision. In contrast to other tool localization methods in the literature, the proposed approach does not require manual annotations of the tooltips, but relies on tool segmentation, which is advantageous as the manual annotation effort could be trivially waived employing methods such as that recently proposed in <xref ref-type="bibr" rid="B26">Garcia-Peraza-Herrera et al. (2021)</xref>. This vision pipeline was individually validated and the proposed tooltip localization method was able to detect tips in 84.46% of the frames. This performance proved sufficient to allow for a successful autonomous guidance of the endoscope (per user study of the whole robotic system).</p>
<p>We propose a novel visual servoing method for a generalized endoscope model with support for both remote center of motion and endoscope bending. We show that a hybrid of position-based visual servoing (PBVS) and 3D image-based visual-servoing (IBVS) is preferred for robotic endoscope control.</p>
<p>We run a user study of the whole robotic system on a standardized bi-manual coordination and navigation laparoscopic task accredited for surgical training (<xref ref-type="bibr" rid="B22">European Academy of Gynaecological Surgery, 2020</xref>). In this study we show that the combination of novel tool localization and visual servoing proposed is robust enough to allow for the successful autonomous control of the endoscope. During the user study experiments (eight people, five trials), participants were able to complete the bi-manual coordination surgical task without the aid of a camera assistant and in a reasonable time (172s on average).</p>
</sec>
<sec id="s1-2">
<title>1.2 Towards Semantically Rich Robotic Endoscope Control</title>
<p>While solo surgery has been demonstrated with simple robotic endoscope control approaches (<xref ref-type="bibr" rid="B69">Takahashi, 2020</xref>), we argue that to overcome the usability issues that impede broad clinical adoption of robotic endoscope holders and move towards solo surgery, robotic endoscope control should be performed at the task autonomy level. To efficiently operate in this setting, a robotic endoscope holder should accept a set of <italic>semantically rich instructions</italic>. These instructions correspond to the commands that a surgeon would normally issue to a human camera assistant. This contrasts with earlier approaches, where the very limited instruction sets (up, down, left, right, zoom in, zoom out) lead to a semantic gap between the robotic endoscopic holder and the surgeon (<xref ref-type="bibr" rid="B41">Kunze et al., 2011</xref>). With semantically rich instructions, it would be possible to bridge this gap and restore the familiar relationship between the surgeon and the (now tireless and precise) camera assistant.</p>
<p>A semantically rich instruction set should contain commands that induce context-aware actions. Examples of such are &#x201c;zoom in on the last suture,&#x201d; &#x201c;hold the camera stationary above the liver,&#x201d; and &#x201c;focus the camera on my right instrument.&#x201d; When these instructions are autonomously executed by a robotic endoscope holder, we refer to the control as <italic>semantically rich robotic endoscope control</italic>. We believe that semantically rich robotic endoscope control can effectively overcome the problem of intermittent operation with endoscopic holders, does not disrupt the established surgical workflow, ensures minimal overhead for the surgeon, and overall maximizes the usability and efficiency of the intervention.</p>
<p>Although instructions that have the camera track an anatomical feature are relevant, autonomous instrument tracking instructions (e.g. &#x201c;focus the camera between the instruments&#x201d;) play a prominent role, as they are common to a large number of laparoscopic procedures and form a fundamental step towards solo surgery. Therefore, in this work we focus on semantically rich instructions related to the autonomous instrument tracking (AIT) of a maximum of two endoscopic instruments (one per hand of the operating surgeon, <italic>see</italic> <xref ref-type="fig" rid="F1">Figure 1</xref>). Particularly, the proposed method implements the instructions &#x201c;focus on the right/left instrument&#x201d; and &#x201c;focus on a point between the instruments.&#x201d; User interface methods to translate requests expressed by the surgeon (e.g. voice control) to these AIT instructions fall outside the scope of this work.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Proposed experimental setup for autonomous endoscope control in a laparoscopic setting. The LASTT model (<xref ref-type="bibr" rid="B22">European Academy of Gynaecological Surgery, 2020</xref>) showcased within the box trainer is designed for the simulation of simple surgical tasks with a focus on gynaecological procedures. It is common ground for the practice and evaluation of hand-eye and bi-manual coordination skills.</p>
</caption>
<graphic xlink:href="frobt-09-832208-g001.tif"/>
</fig>
<p>The remainder of the paper is organized as follows. After describing the related work, the AIT problem is stated in <xref ref-type="sec" rid="s3">Section 3</xref>. The quality of the AIT depends on robust methods to localize one or more surgical instruments in the endoscopic view. <xref ref-type="sec" rid="s4">Section 4</xref> describes a novel image-processing pipeline that was developed to tackle this problem. Visual servoing methods are described in <xref ref-type="sec" rid="s5">Section 5</xref>. These methods provide the robotic endoscope control with the ability to track the detected instruments autonomously. An experimental user study campaign is set up and described in <xref ref-type="sec" rid="s6">Section 6</xref> to demonstrate the value of AIT in a validated surgical training task. <xref ref-type="sec" rid="s7">Section 7</xref> discusses the obtained results and <xref ref-type="sec" rid="s8">Section 8</xref> draws conclusions regarding the implementation of the AIT instructions proposed in this work.</p>
</sec>
</sec>
<sec id="s2">
<title>2 Related Work</title>
<p>Robotic endoscope control (REC) allows the surgeon to control the endoscope without having to free their hands. A wide variety of dedicated control interfaces have been developed and commercialized for this purpose, including joystick control, voice control, gaze control and head gesture control (<xref ref-type="bibr" rid="B70">Taniguchi et al., 2010</xref>). Despite the apparent differences in these interfaces, established approaches offer the surgeon a basic instruction set to control the endoscope. This set typically consists of six instructions: zoom in/out, move up/down, and move left/right. The basic nature of these instructions makes detailed positioning cumbersome, usually resorting to a lengthy list of instructions. This shifts the surgeon&#x2019;s focus from handling the surgical instruments towards the positioning of the endoscope, as concurrent execution of those actions is often strenuous and confusing (<xref ref-type="bibr" rid="B36">Jaspers et al., 2004</xref>). Moreover, the increased mental workload stemming from simultaneous control of the instruments and the endoscope might adversely affect intervention outcomes (<xref ref-type="bibr" rid="B9">Bihlmaier, 2016</xref>). Similar to passive endoscope holders, robotic endoscope holders that offer a basic set of instructions prevent fluid operation and lead to intermittent action.</p>
<p>A number of REC approaches that pursue fully autonomous operation have been proposed as well. A starting point for an autonomous REC strategy is to reposition the endoscope so as to keep the surgical instrument tip centered in the view. Such an approach could work already when only the 2D position of the instrument in the endoscopic image is available (<xref ref-type="bibr" rid="B72">Uecker et al., 1995</xref>; <xref ref-type="bibr" rid="B50">Osa et al., 2010</xref>; <xref ref-type="bibr" rid="B3">Agustinos et al., 2014</xref>; <xref ref-type="bibr" rid="B86">Zinchenko and Song, 2021</xref>). In this case, the endoscope zoom level (depth) is left uncontrolled. Some of these methods also require a 3D geometrical instrument model (<xref ref-type="bibr" rid="B3">Agustinos et al., 2014</xref>), limiting the flexibility of the system. Approaches such as proposed by <xref ref-type="bibr" rid="B86">Zinchenko and Song (2021)</xref> have also suggested to replace the endoscope screen with a head-mounted virtual reality device that facilitates the estimation of the surgeon&#x2019;s attention focus from the headset&#x2019;s gyroscope. In this scenario, the autonomous REC strategy aims to reposition the endoscope with the aim of maintaining the weighted center of mass between the instruments&#x2019; contour centers and the point of focus in the center of the view. However, it has been shown in works such as (<xref ref-type="bibr" rid="B33">Hanna et al., 1997</xref>; <xref ref-type="bibr" rid="B49">Nishikawa et al., 2008</xref>) that the zoom level is important for effective endoscope positioning. Other authors tried to circumvent the lack of depth information in 2D endoscopic images by relating the inter-instrument distance to the zoom level (<xref ref-type="bibr" rid="B64">Song et al., 2012</xref>; <xref ref-type="bibr" rid="B39">King et al., 2013</xref>). This approach is obviously limited to situations where at least two instruments are visible.</p>
<p>When the 3D instrument tip position is available, smarter autonomous REC strategies are possible. In the context of fully robotic surgery, kinematic-based tooltip position information has been used to provide autonomously guided ultrasound imaging with corresponding augmented reality display for the surgeon (<xref ref-type="bibr" rid="B61">Samei et al., 2020</xref>). Kinematics have also been employed by Mariani and Da Col <italic>et al.</italic>(<xref ref-type="bibr" rid="B46">Mariani et al., 2020</xref>; <xref ref-type="bibr" rid="B18">Da Col et al., 2021</xref>) for autonomous endoscope guidance in a user study on <italic>ex vivo</italic> bladder reconstruction with the da Vinci Surgical System. In their experimental setup, the system could track either a single instrument or the midpoint between two tools. Similarly, <xref ref-type="bibr" rid="B7">Avellino et al. (2020)</xref> have also employed kinematics for autonomous endoscope guidance in a co-manipulation scenario<xref ref-type="fn" rid="fn2">
<sup>1</sup>
</xref>. In (<xref ref-type="bibr" rid="B14">Casals et al., 1996</xref>; <xref ref-type="bibr" rid="B48">Mudunuri, 2010</xref>), rule-based strategies switch the control mode between single-instrument tracking or tracking points that aggregate locations of all visible instruments. Pandya <italic>et al.</italic> argued that such schemes are reactive and that better results can be obtained with predictive schemes, which incorporate knowledge of the surgery and the surgical phase (<xref ref-type="bibr" rid="B52">Pandya et al., 2014</xref>). Examples of such knowledge-based methods are (<xref ref-type="bibr" rid="B76">Wang et al., 1998</xref>; <xref ref-type="bibr" rid="B42">Kwon et al., 2008</xref>; <xref ref-type="bibr" rid="B78">Weede et al., 2011</xref>; <xref ref-type="bibr" rid="B57">Rivas-Blanco et al., 2014</xref>; <xref ref-type="bibr" rid="B9">Bihlmaier, 2016</xref>; <xref ref-type="bibr" rid="B75">Wagner et al., 2021</xref>). While promising in theory, in practice, the effort to create complete and reliable models for an entire surgery is excessive for current surgical data science systems. In addition, accurate and highly robust surgical phase recognition algorithms are required, increasing the complexity of this solution considerably.</p>
<p>With regards to the levels of autonomy in robotic surgery, <xref ref-type="bibr" rid="B82">Yang et al. (2017)</xref> have recently highlighted that the above strategies aim for very high autonomy levels but take no advantage of the surgeon&#x2019;s presence. In essence, the surgeon is left with an empty instruction set to direct the endoscope holder. Besides being hard to implement given the current state of the art, such high autonomy levels may be impractical and hard to transfer to clinical practice. Effectively, an ideal camera assistant only functions at the task autonomy level. This is also in line with the recent study by <xref ref-type="bibr" rid="B17">Col et al. (2020)</xref>, who concluded that it is important for endoscope control tasks to find the right trade-off between user control and autonomy.</p>
<p>To facilitate the autonomous endoscope guidance for laparoscopic applications when the 3D instrument tip position is not available, some authors have proposed to attach different types of markers to the instruments (e.g. optical, electromagnetic). This modification often comes with extra sensing equipment that needs to be added to the operating room.</p>
<p>In <xref ref-type="bibr" rid="B65">Song and Chen (2012)</xref>, authors proposed to use a monocular webcam mounted on a robotic pan-tilt platform to track two laparoscopic instruments with two colored rings attached to each instrument. They employed the estimated 2D image coordinates of the fiducial markers to control all the degrees of freedom of the robotic platform. However, this image-based visual servoing is not able to attain a desired constant depth to the target tissue (as also shown in our simulation of image-based visual servoing in <xref ref-type="sec" rid="s5-3">Section 5.3</xref>). In addition, the choice of fiducial markers is also an issue. Over the years, going back at least as far as to (<xref ref-type="bibr" rid="B73">Uenohara and Kanade, 1995</xref>), many types of markers have been proposed by the community for tool tracking purposes. For example, straight lines (<xref ref-type="bibr" rid="B14">Casals et al., 1996</xref>), black stripes (<xref ref-type="bibr" rid="B84">Zhang and Payandeh, 2002</xref>), cyan rings (<xref ref-type="bibr" rid="B71">Tonet et al., 2007</xref>), green stripes (<xref ref-type="bibr" rid="B56">Reiter et al., 2011</xref>), multiple colour rings for multiple instruments (blue-orange, blue-yellow) (<xref ref-type="bibr" rid="B63">Seong-Young et al., 2005</xref>), and multiple colour (red, yellow, cyan and green) bio-compatible markers (<xref ref-type="bibr" rid="B10">Bouarfa et al., 2012</xref>). However, although fiducial markers such as colored rings ease the tracking of surgical instruments, attaching or coating surgical instruments with fiducial markers presents serious sterilization, legal and installation challenges (<xref ref-type="bibr" rid="B68">Stoyanov, 2012</xref>; <xref ref-type="bibr" rid="B11">Bouget et al., 2017</xref>). First, the vision system requires specific tools to work or a modification of the current ones, which introduces a challenge for clinical translation. At the same time, computational methods designed to work with fiducials cannot easily be trained with standard previously recorded interventions. Additionally, to be used in human experiments, the markers need to be robust to the sterilisation process (e.g. autoclave). This poses a manufacturing challenge and increases the cost of the instruments. The positioning of the markers is also challenging. If they are too close to the tip, they might be occluded by the tissue being manipulated. If they are placed back in the shaft, they might be hidden to the camera, as surgeons tend to place the endoscope close to the operating point. Even if they are optimally positioned, fiducials may be easily covered by blood, smoke, or pieces of tissue. In addition to occlusions, illumination (reflections, shadows) and viewpoint changes still remain a challenge for the detection of the fiducial markers.</p>
<p>In contrast to using colored markers, <xref ref-type="bibr" rid="B62">Sandoval et al. (2021)</xref> used a motion capture system (MoCap) in the operating room to help the autonomous instrument tracking. The MoCap consisted of an exteroceptive sensor composed of eight high resolution infrared cameras. This system was able to provide the position of the reflective markers placed at the instruments (four markers per instrument) in real time. However, the MoCap increases considerably the cost of the proposed system and complicates the surgical workflow. Instruments need to be modified to add the markers, and the MoCap needs to be installed in the operating room. As any other optical tracking system, it also possesses the risk of occlusions in the line of sight, making it impossible for the system to track the instruments when such occlusions occur. As opposed to all these different markers, the endoscope is necessary to perform the surgery, and the surgeon needs to be able to see the instruments to carry out the intervention, so using the endoscope and its video frames without any instrument modifications to help track the tools is a solution that stems naturally from the existing surgical workflow. This has also been the path followed in the devise of <sc>AutoLap</sc>
<sup>&#x2122;</sup> (Medical Surgery Technologies, Yokneam, Israel) (<xref ref-type="bibr" rid="B79">Wijsman et al., 2018</xref>, <xref ref-type="bibr" rid="B80">2022</xref>), which is, to the best of our knowledge, the only robotic laparoscopic camera holder that claims to have incorporated image-based laparoscopic camera steering within its features. However, no technical details are provided in these publications on how it is achieved.</p>
</sec>
<sec id="s3">
<title>3 Autonomous Instrument Tracking</title>
<p>In a typical surgical scenario, a surgeon manipulates two instruments: one in the dominant and one in the non-dominant hand. In such a case, the surgeon might want to focus the camera on one specific instrument, or center the view on a point in between the instruments, depending on their relative importance. AIT strives to automate these tasks, as explained next.</p>
<sec id="s3-1">
<title>3.1 Centering Instrument Tips in FoV</title>
<p>With one instrument present, the proposed AIT objective is to center the instrument tip position <bold>
<italic>s</italic>
</bold> in the FoV, as is illustrated in <xref ref-type="fig" rid="F2">Figure 2</xref> (top). With two visible instruments, a relative dominance factor <italic>w</italic>
<sub>
<italic>d</italic>
</sub> &#x2208; [0, 1] can be assigned to the instruments (adjustable <italic>via</italic> a semantically rich instruction &#x201c;change dominance factor X% to the right/left&#x201d;). The AIT controller can then track the virtual average tip position according to<disp-formula id="e1">
<mml:math id="m1">
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:math>
<label>(1)</label>
</disp-formula>Where <bold>
<italic>s</italic>
</bold>
<sub>
<italic>l</italic>
</sub> and <bold>
<italic>s</italic>
</bold>
<sub>
<italic>r</italic>
</sub> are the respective tip positions of the left and right instrument as visualized in <xref ref-type="fig" rid="F2">Figure 2</xref>, bottom.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Endoscopic view with one instrument tip at position <bold>
<italic>s</italic>
</bold> (top), and two instrument tips at positions <bold>
<italic>s</italic>
</bold>
<sub>
<italic>l</italic>
</sub> and <bold>
<italic>s</italic>
</bold>
<sub>
<italic>r</italic>
</sub>, combined to a virtual tip position <bold>
<italic>s</italic>
</bold> (bottom). The AIT controller aims to make <bold>
<italic>s</italic>
</bold> coincide with a desired position <bold>
<italic>s</italic>
</bold>&#x2a;. A, B and C are respectively the target, transition and violation zones of a programmed position hysteresis approach.</p>
</caption>
<graphic xlink:href="frobt-09-832208-g002.tif"/>
</fig>
<p>If the AIT were implemented to continuously track the virtual tip position <bold>
<italic>s</italic>
</bold>, the view would never come to a standstill, which would be disturbing for the surgeon. As a solution, also suggested in (<xref ref-type="bibr" rid="B9">Bihlmaier, 2016</xref>) and <xref ref-type="bibr" rid="B21">Eslamian et al. (2020)</xref>, a position hysteresis behaviour can be implemented. In this work, a combination of instructions with position hysteresis is implemented based on three zones in the endoscope FoV. As illustrated in <xref ref-type="fig" rid="F2">Figure 2</xref>, target zone A captures the ideal location of the tooltip, and transition zone B represents a tolerated region. Entering a violation zone C triggers re-positioning of the endoscope. Whenever <bold>
<italic>s</italic>
</bold> moves from zone B to zone C, the AIT will be activated. It will then stay active until <bold>
<italic>s</italic>
</bold> reaches zone A. Afterwards, the FoV will be kept stable, until <bold>
<italic>s</italic>
</bold> again crosses the border between zone B and zone C.</p>
<p>This implementation of AIT offers the surgeon instructions to track either instrument, to change the dominance factor, or to stop the tracking by disabling the AIT. Note that this implementation of AIT only specifies two degrees of freedom (DoFs) out of the four available DoFs in typical laparoscopy. The depth DoF is controlled by an additional instruction for the zoom level, i.e., the distance between the camera and the instrument tip. The DoF that rolls the endoscope around its viewing axis is controlled to always enforce an intuitive horizontal orientation of the camera horizon. If desired, a semantically rich instruction could be added to alter this behaviour.</p>
</sec>
<sec id="s3-2">
<title>3.2 Comanipulation Fallback</title>
<p>As neither the set of AIT instructions nor any other set of instructions can realistically cover all instructions relevant for semantically rich REC, situations can arise in surgical practice where the capabilities of the robotic endoscope holder are insufficient. In such a case, it is necessary to switch to a comanipulation mode. This kind of switching is already the clinical reality for commercial robotic endoscope holders (<xref ref-type="bibr" rid="B29">Gillen et al., 2014</xref>; <xref ref-type="bibr" rid="B34">Holl&#xe4;nder et al., 2014</xref>) and is particularly relevant when the system is used to support rather than replace the surgical assistant.</p>
<p>This work proposes to embed an easy switching functionality as a system feature. A natural transition from REC to comanipulation mode can be made possible through the use of a mechanically backdrivable robotic endoscope holder. This way, no extra hardware components are needed for switching, neither is it necessary to release the endoscope from the robotic endoscope holder. Instead, the surgeon can simply release one instrument, grab the endoscope and comanipulate it jointly with the robotic endoscope holder. During comanipulation, the human provides the intelligence behind the endoscope motions, while still experiencing support in the form of tremor-eliminating damping and fatigue-reducing gravity compensation. Such an approach broadens the scope of interventions where REC can be realistically applied.</p>
</sec>
</sec>
<sec id="s4">
<title>4 Markerless Instrument Localization</title>
<p>REC based on semantically rich instructions requires the robotic endoscope holder to autonomously execute context-aware tasks. This implies a need to autonomously collect contextual information. The AIT instruction relies on knowledge of the tip position <bold>
<italic>s</italic>
</bold> of the surgical instruments in the endoscopic view. To obtain this information, without the need to alter the employed instruments or surgical workflow, a markerless instrument localization pipeline is developed in this section. Note that the term <italic>localization</italic> is employed here, instead of the commonly used term <italic>tracking</italic>, as for the sake of clarity this work reserves <italic>tracking</italic> for the robotic servoing approaches needed for AIT.</p>
<sec id="s4-1">
<title>4.1 Instrument Localization Approaches</title>
<p>If, in addition to the endoscope, the instruments are also mounted on a robotic system (<xref ref-type="bibr" rid="B78">Weede et al., 2011</xref>; <xref ref-type="bibr" rid="B20">Eslamian et al., 2016</xref>) or if they are monitored by an external measurement system (<xref ref-type="bibr" rid="B49">Nishikawa et al., 2008</xref>; <xref ref-type="bibr" rid="B54">Polski et al., 2009</xref>), the position of the instruments can be directly obtained, provided that all involved systems are correctly registered and calibrated. However, in this work, manual handling of off-the-shelf laparoscopic instruments precludes access to such external localization information.</p>
<p>An alternative, which we use in this work, is to exploit the endoscope itself as the sensor. A review on this topic has been published relatively recently by <xref ref-type="bibr" rid="B11">Bouget et al. (2017)</xref>. In their work Bouget et al. present a comprehensive survey of the last years of research in tool detection and tracking with a particular focus on methods proposed prior to the advent of the deep learning approaches. Recent instrument localization techniques based on Convolutional Neural Networks (CNN) (<xref ref-type="bibr" rid="B30">Gonz&#xe1;lez et al., 2020</xref>; <xref ref-type="bibr" rid="B51">Pakhomov et al., 2020</xref>) are currently recognized as the state-of-the-art approaches (<xref ref-type="bibr" rid="B5">Allan et al., 2019</xref>; <xref ref-type="bibr" rid="B60">Ro&#xdf; et al., 2021</xref>) for such problems. In this work, we leverage our previous experience with CNN-based real-time tool segmentation networks (<xref ref-type="bibr" rid="B28">Garc&#xed;a-Peraza-Herrera et al., 2016</xref>; <xref ref-type="bibr" rid="B27">Garcia-Peraza-Herrera et al., 2017</xref>) and embed the segmentation in a stereo pipeline to estimate the location of the tooltips in 3D.</p>
</sec>
<sec id="s4-2">
<title>4.2 Instrument Localization Pipeline</title>
<p>A multi-step image processing pipeline was developed for markerless image-based instrument localization (<italic>see</italic> <xref ref-type="fig" rid="F3">Figure 3</xref>). As input, the pipeline takes the raw images from a stereo endoscope. As output, it provides the 3D tip positions of the visible instruments. The maximum number of instruments and tips per instrument are required as inputs. In the task performed in our user study, presented in <xref ref-type="sec" rid="s6">Section 6</xref>), a maximum of two instruments with two tips may be present.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Instrument localization pipeline: <bold>(A)</bold> stereo-rectified right camera input image; <bold>(B)</bold> predicted tool segmentation; <bold>(C)</bold> skeletonisation; <bold>(D)</bold> graph extraction, 2D detection of entrypoints and tips, right and left instrument labelled in green and yellow; <bold>(E)</bold> left and <bold>(F)</bold> right stereo-matched tooltips in 2D (bottom row). The pink dots in <bold>(D)</bold> are graph nodes extracted from the skeleton in <bold>(C)</bold> but represent neither entrypoints nor tooltips.</p>
</caption>
<graphic xlink:href="frobt-09-832208-g003.tif"/>
</fig>
<p>The 2D tooltip localization in image coordinates is a key intermediate step in this pipeline. Training a supervised bounding box detector for the tips could be a possible approach to perform the detection. However, to implement the semantically rich AIT presented in <xref ref-type="sec" rid="s3">Section 3</xref> and <xref ref-type="fig" rid="F2">Figure 2</xref> we would still need to know whether the detected tips belong to the same or different instruments, and more precisely whether they belong to the instrument handled by the dominant or non-dominant hand. Therefore, we opted for estimating the more informative tool-background semantic segmentation instead. <italic>Via</italic> processing the segmentation prediction, we estimate how many instruments are in the image, localize the tips, and associate each tip with either the left or right-hand instrument. A downside of using semantic segmentation in comparison to a detector is the increased annotation time required to build a suitable training set. However, recent advances to reduce the number of contour annotations needed to achieve the segmentation such as (<xref ref-type="bibr" rid="B74">Vardazaryan et al., 2018</xref>; <xref ref-type="bibr" rid="B24">Fuentes-Hurtado et al., 2019</xref>; <xref ref-type="bibr" rid="B26">Garcia-Peraza-Herrera et al., 2021</xref>) greatly mitigate this drawback.</p>
<p>In the remaining of this section we first discuss the assumptions made, imaging hardware, and preprocessing steps. Then, we proceed to describe the localization pipeline. The localization method consists of the following steps: binary tool-background segmentation (<xref ref-type="sec" rid="s4-2-3">Section 4.2.3</xref>), skeletonization of the segmentation mask (<xref ref-type="sec" rid="s4-2-4">Section 4.2.4</xref>), graph extraction from the pixel-wide skeleton (<xref ref-type="sec" rid="s4-2-4">Section 4.2.4</xref>), entrynode detection on the graph (<xref ref-type="sec" rid="s4-2-5">Section 4.2.5</xref>), leaf node detection on the graph (<xref ref-type="sec" rid="s4-2-6">Section 4.2.6</xref>), leaf node to entry node matching (<xref ref-type="sec" rid="s4-2-6">Section 4.2.6</xref>), and left/right instrument identification (<xref ref-type="sec" rid="s4-2-8">Section 4.2.8</xref>). After matching leaf nodes to entry nodes we have a subgraph for each instrument, and we distinguish between the left/right instrument using the estimated location of each instrument&#x2019;s entry node (<xref ref-type="sec" rid="s4-2-8">Section 4.2.8</xref>).</p>
<p>The implementation of the whole localization pipeline was done in Python, reading the video feed from the framegrabber V4L2 device with OpenCV, and performing the deep learning inference with Caffe (<xref ref-type="bibr" rid="B37">Jia et al., 2014</xref>) on an NVIDIA GeForce GTX Titan X GPU.</p>
<sec id="s4-2-1">
<title>4.2.1 Assumptions of Proposed Instrument Localization Pipeline</title>
<p>In our instrument localization pipeline, we assume that the instruments are not completely occluded. Partial occlusions are supported, as long as there is a visible path from the <italic>entrypoint</italic> to the tip of the instrument. Note that with <italic>entrypoint</italic> we refer to the point located at the edge of the endoscopic content area where the instrument enters the image. This point is not to be confused with the <italic>incision point</italic> which is the point on the body wall where the incision is made through which the instrument enters the patient&#x2019;s body. Now, if the tip is occluded, the tooltip will be estimated on the furthermost point of the shaft. When the entrypoint is completely covered, the instrument will not be detected in the current approach. Methods that exploit knowledge of the incision point could help in such a case (and could be explored in future work as they do not form the core of this work). The current limitations are illustrated in <xref ref-type="fig" rid="F4">Figure 4</xref>. The assumption that instruments have to enter from the edge serves two purposes, <italic>1</italic>) as a noise reduction technique for the segmentation, because false positive <italic>islands</italic> of pixels can be easily discarded, and <italic>2</italic>) to detect whether the instrument is held by the right/left hand of the surgeon (as explained in <xref ref-type="sec" rid="s4-2-8">Section 4.2.8</xref>). In most cases, the entrypoint of at least one of the instruments will be visible. Therefore, the benefits of the assumption that instruments will not be completely occluded largely outweigh its limitations. The proposal of surgical tool segmentation models that are robust to entrypoint occlusions (<xref ref-type="fig" rid="F4">Figure 4</xref>, right) or complete occlusions is out of the scope of this work.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Behaviour and limitations of the instrument localization pipeline in the presence of occlusions. The detected entrypoint and tooltip are indicated by the green and yellow arrow, respectively. In the partially occluded instrument (left), there is a visible path from entrypoint to tip, therefore the instrument is correctly detected. However, when the tip is occluded (center), the tooltip is detected to be on the shaft. If the entrypoint is occluded (right), the instrument is not detected in this stage of the research as tools are expected to enter the scene from the boundary of the content area.</p>
</caption>
<graphic xlink:href="frobt-09-832208-g004.tif"/>
</fig>
</sec>
<sec id="s4-2-2">
<title>4.2.2 Imaging Hardware and Preprocessing Procedure</title>
<p>The stereo camera and endoscopy module of choice for this work were the <sc>Tipcam1 S 3D ORL</sc> (30&#xb0; view on a 4&#xa0;mm outer diameter shaft) and <sc>Image1 S D3-LINK</sc> respectively (both from <sc>Karl Storz</sc>, Germany). The DVI output of the endoscopy module is plugged into a <sc>DVI2PCIe Duo</sc> framegrabber (<sc>Epiphan</sc>, Canada). The endoscopy module produces images at 60&#xa0;&#x2009;fps and at a resolution of 1920, &#xd7;, 1080 pixels, which turns into 1920, &#xd7;, 540 as each grabbed image contains a stereo pair with the left frame on even rows and the right frame on odd ones. These images are upsampled in the <italic>y</italic>-axis so that two images of 1920, &#xd7;, 1080 pixels are obtained. Upscaling is chosen (as opposed to downsampling to 960, &#xd7;, 540 pixels) to avoid degrading the depth resolution based on <italic>x</italic>-axis disparity. The left-right cameras are calibrated using a chessboard pattern of 1.1&#xa0;mm-wide squares (Cognex Glass Calibration Plate Set 320-0015R, <sc>Applied Image Inc.</sc>, NY, United States). Both frames, left and right, are rectified in real-time. Then, the black background of the images is cropped out, keeping just a square crop of the endoscopic circle content area (as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>), which results in an image of 495 &#xd7; 495 pixels. Finally, the image where the 2D tooltip localization is going to be performed (either the left or right frame can be chosen without loss of generality) is downsampled to 256 &#xd7; 256 pixels to speed up the subsequent processing steps (i.e. segmentation, graph extraction and 2D tooltip localization). Once the 2D tooltips have been estimated, they are extrapolated to the original image size and the disparity estimation and 3D tooltip reconstruction in <xref ref-type="sec" rid="s4-2-9">Section 4.2.9</xref> is performed on the original upsampled images of 1920, &#xd7;, 1080 pixels.</p>
</sec>
<sec id="s4-2-3">
<title>4.2.3 Instrument Segmentation</title>
<p>In this work, we trained a CNN to segment instruments in our experimental setup (<italic>see</italic> <xref ref-type="fig" rid="F1">Figure 1</xref>). While having the necessary characteristics for a bimanual laparoscopic task, the visual appearance of the surgical training model we use is not representative of a real clinical environment. Therefore, we do not propose a new image segmentation approach but rather focus on the downstream computational questions. In order to translate our pipeline to the clinic, a newly annotated dataset containing real clinical images would need to be curated, and the images would need to contain artifacts typical of endoscopic procedures such as blood, partial occlusions, smoke, and blurring. Alternatively, an existing surgical dataset could be used. We have compiled a list of public datasets for tool segmentation<xref ref-type="fn" rid="fn3">
<sup>2</sup>
</xref> where the data available includes surgical scenes such as retinal microsurgery, laparoscopic adrenalectomy, pancreatic resection, neurosurgery, colorectal surgery, nephrectomy, proctocolectomy, and cholecystectomy amongst others. The compiled list also includes datasets for similar endoscopic tasks such as tool presence, instrument classification, tool-tissue action detection, skill assessment and workflow recognition, and laparoscopic image-to-image translation. The unlabelled data in these other datasets could also be potentially helpful for tool segmentation.</p>
<p>Next, we provide the details on how we built the segmentation model for our particular proof-of-concept of the robotic endoscope control. The dataset we curated consists of 1110 image-annotation pairs used for training, and 70 image-annotation pairs employed for validation (hyperparameter tuning). These 1110, &#x2b; ,70 image-annotation pairs were manually selected by the first co-authors so that the chosen images represent well the variety of scenes in the task. They have been extracted from the recording of a surgical exercise in the lab, prior to the user study, and in a different location. There is no testing set at this point because the segmentation is an intermediary step. In <xref ref-type="sec" rid="s7-1">Section 7.1</xref>, we give more details about our testing set, which is used to evaluate the whole tooltip localization pipeline (as opposed to just the segmentation). The images in each stereo pair do not look the same: there is an observable difference in colour tones between them. Therefore, the data set has an even number of left and right frames such that either of them could be used as input for the surgical tool segmentation. In the training set, 470 images (42%) do not contain any tool. In them, the endoscope just observes the task setting under different viewpoints and lighting conditions (diverse intensities of the light source). The remaining 640 images of the training set, and all images of the validation set, have been manually labelled with delineations of the laparoscopic tools. The U-Net (<xref ref-type="bibr" rid="B59">Ronneberger et al., 2015</xref>) architecture showed superior performance in the tool segmentation EndoVis MICCAI challenge (<xref ref-type="bibr" rid="B5">Allan et al., 2019</xref>). Therefore, this was the architecture of choice employed for segmentation (32 neurons in the first layer and convolutional blocks composed of Conv &#x2b; ReLU &#x2b; BN). A minibatch of four images is used. Default conventional values and common practice was followed for setting the hyperparameters as detailled hereafter. The batch normalization (<xref ref-type="bibr" rid="B35">Ioffe and Szegedy, 2015</xref>) momentum was set to 0.1 (default value in PyTorch). Following the U-Net implementation in (<xref ref-type="bibr" rid="B59">Ronneberger et al., 2015</xref>), Dropout (<xref ref-type="bibr" rid="B66">Srivastava et al., 2014</xref>) was used. In our implementation, Dropout was employed in layers with <inline-formula id="inf1">
<mml:math id="m2">
<mml:mo>&#x2265;</mml:mo>
<mml:mn>512</mml:mn>
</mml:math>
</inline-formula> neurons (<italic>p</italic> &#x3d; 0.5), as in (<xref ref-type="bibr" rid="B27">Garcia-Peraza-Herrera et al., 2017</xref>). Following <xref ref-type="bibr" rid="B8">Bengio (2012)</xref>, the initial learning rate (LR) of choice was set to 1<italic>e</italic> &#x2212; 2. The network was trained for a maximum of 100 epochs. As is common practice, LR decay was employed during training, multiplying the LR by 0.5 every 10 epochs. Data augmentation was limited to on-the-fly left-right flips. As we evaluate our segmentation using the intersection over union (IoU), our loss function <inline-formula id="inf2">
<mml:math id="m3">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>IoU</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> is a continuous approximation to the intersection over union (<xref ref-type="bibr" rid="B55">Rahman and Wang, 2016</xref>) averaged over classes:<disp-formula id="e2">
<mml:math id="m4">
<mml:mtable class="aligned">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mi>I</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x3d;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mi>U</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x3d;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>IoU</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mfrac>
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>U</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(2)</label>
</disp-formula>Where <italic>P</italic> is the number of pixels, <italic>K</italic> &#x3d; 2 is the number of classes (instrument and background), <inline-formula id="inf3">
<mml:math id="m5">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> represents the estimated probability maps, <bold>y</bold> represents the ground truth probability maps, <inline-formula id="inf4">
<mml:math id="m6">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> is the estimated probability of the pixel <italic>i</italic> belonging to the class <italic>k</italic>, and <italic>y</italic>
<sub>
<italic>i</italic>,<italic>k</italic>
</sub> is the ground truth probability of the pixel <italic>i</italic> belonging to class <italic>k</italic>. A machine epsilon <italic>&#x3f5;</italic> is added to prevent divisions by zero (e.g., in case that both prediction and ground truth are all background).</p>
<p>Once we have obtained a segmentation prediction from the trained convolutional model, we proceed to convert the segmentation into a graph, which is a stepping stone towards the tooltip detection.</p>
</sec>
<sec id="s4-2-4">
<title>4.2.4 Instrument Graph Construction</title>
<p>The instrument segmentation prediction is skeletonized <italic>via</italic> medial surface axis thinning (<xref ref-type="bibr" rid="B44">Lee et al., 1994</xref>). The resulting skeleton is converted <italic>via</italic> the Image-Py skeleton network framework (<xref ref-type="bibr" rid="B81">Xiaolong, 2019</xref>) into a pixel skeleton graph <italic>G</italic> &#x3d; (<italic>V</italic>, <italic>E</italic>) (<italic>see</italic> <xref ref-type="fig" rid="F5">Figure 5E</xref>), where <italic>V</italic> is a set of vertices and <inline-formula id="inf5">
<mml:math id="m7">
<mml:mi>E</mml:mi>
<mml:mo>&#x2286;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mfenced open="" close="]">
</mml:mfenced>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo>&#x2227;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is a set of edges. The nodes <italic>v</italic>
<sub>
<italic>i</italic>
</sub> &#x2208; <italic>V</italic> are defined as a tuple <italic>v</italic>
<sub>
<italic>i</italic>
</sub> &#x3d; (<italic>i</italic>, <bold>
<italic>p</italic>
</bold>
<sub>
<bold>
<italic>i</italic>
</bold>
</sub>) where <italic>i</italic> and <bold>
<italic>p</italic>
</bold>
<sub>
<bold>
<italic>i</italic>
</bold>
</sub> &#x3d; {<italic>x</italic>
<sub>
<italic>i</italic>
</sub>, <italic>y</italic>
<sub>
<italic>i</italic>
</sub>} represent the node index and 2<italic>D</italic> point image coordinates, respectively.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Surgical instrument graph construction and entry node extraction: <bold>(A)</bold> segmentation mask; <bold>(B)</bold> circle mask used to detect entrypoints; <bold>(C)</bold> intersection of segmentation mask and circle mask; <bold>(D)</bold> segmentation mask skeleton obtained according to (<xref ref-type="bibr" rid="B44">Lee et al., 1994</xref>); <bold>(E)</bold> graph obtained from skeleton by means of (<xref ref-type="bibr" rid="B81">Xiaolong, 2019</xref>); <bold>(F)</bold> entrypoint detection. If several graph nodes lie inside the entrypoint mask (in red), they are merged into a new single entry node (in green) whose position attribute is set to the centroid of all the graph nodes inside the dotted area.</p>
</caption>
<graphic xlink:href="frobt-09-832208-g005.tif"/>
</fig>
</sec>
<sec id="s4-2-5">
<title>4.2.5 Instrument Entry Node Extraction</title>
<p>As the size of the image is known, a circular segmentation mask (<italic>see</italic> <xref ref-type="fig" rid="F5">Figure 5B</xref>) is used to detect the graph nodes that could potentially correspond to instrument entrypoints. That is, given <italic>G</italic>, we populate a set <italic>R</italic> containing those graph nodes that represent tool entrypoints into the endoscopic image. Those graph nodes contained within the intersection of the circle mask and the tool segmentation mask are collapsed into a single new <italic>entry</italic> node <italic>v</italic>
<sub>
<italic>c</italic>
</sub> &#x3d; (<italic>n</italic>, <bold>
<italic>p</italic>
</bold>
<sub>
<bold>
<italic>c</italic>
</bold>
</sub>) per instrument, where <bold>
<italic>p</italic>
</bold>
<sub>
<bold>
<italic>c</italic>
</bold>
</sub> &#x3d; {<italic>x</italic>
<sub>
<italic>c</italic>
</sub>, <italic>y</italic>
<sub>
<italic>c</italic>
</sub>} is set to the centroid of all nodes captured within the aforementioned intersection. <italic>See</italic> <xref ref-type="fig" rid="F5">Figures 5B&#x2013;F</xref> for an example of entry node extraction.</p>
<p>A depth-first search is launched from each entry node to determine all the graph nodes that can be reached from entry nodes. Those that cannot be reached are pruned from the graph.</p>
</sec>
<sec id="s4-2-6">
<title>4.2.6 Instrument Leaf Node to Entry Node Matching</title>
<p>Let <italic>L</italic> &#x3d; {<italic>v</italic> &#x2208; <italic>V</italic> : <italic>d</italic>
<sub>
<italic>G</italic>
</sub>(<italic>v</italic>) &#x3d; 1 &#x2227; <italic>v</italic>&#x2209;<italic>R</italic>} be the set containing all <italic>leaf</italic> nodes, where <italic>d</italic>
<sub>
<italic>G</italic>
</sub>(<italic>v</italic>) &#x3d; &#x7c;{<italic>u</italic> &#x2208; <italic>V</italic>: {<italic>u</italic>, <italic>v</italic>} &#x2208; <italic>E</italic>}&#x7c;. In this part of the instrument localization pipeline each <italic>leaf</italic> node in <italic>L</italic> is paired to an entrypoint node in <italic>R</italic>. This is solved by recursively traversing <italic>G</italic>, starting from each <italic>leaf</italic>. The criteria to decide which node to traverse next is coined in this work as <italic>dot product recursive traversal</italic>. It is based on the assumption that the correct path from a tip to a corresponding entrypoint is the one with minimal direction changes. The stopping condition is reaching an entry node.</p>
<p>Dot product recursive traversal operates as follows. Let <italic>v</italic>
<sub>
<italic>i</italic>
</sub>, <italic>v</italic>
<sub>
<italic>j</italic>
</sub> &#x2208; <italic>V</italic> be two arbitrary nodes, and {<italic>v</italic>
<sub>
<italic>i</italic>
</sub>, <italic>v</italic>
<sub>
<italic>j</italic>
</sub>} the undirected edge connecting them. Assuming <italic>v</italic>
<sub>
<italic>i</italic>
</sub> is previously visited and <italic>v</italic>
<sub>
<italic>j</italic>
</sub> being traversed, the next node <italic>v</italic>&#x2a; to visit is chosen following:<disp-formula id="e3">
<mml:math id="m8">
<mml:msup>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mo>argmax</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x22c5;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(3)</label>
</disp-formula>Where <italic>N</italic>(<italic>v</italic>
<sub>
<italic>i</italic>
</sub>) &#x3d; {<italic>w</italic> &#x2208; <italic>V</italic>: {<italic>v</italic>
<sub>
<italic>i</italic>
</sub>, <italic>w</italic>} &#x2208; <italic>E</italic>}, and <italic>A</italic> &#x3d; {<italic>v</italic>
<sub>
<italic>i</italic>
</sub>} is the set of nodes previously traversed. The idea behind <xref ref-type="disp-formula" rid="e3">(3)</xref> is to perform a greedy minimization of direction changes along the path from tooltip to entrypoint (<xref ref-type="fig" rid="F6">Figure 6A</xref>).</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Leaf traversal and hidden leaves: <bold>(A)</bold> graph traversal step. <italic>v</italic>
<sub>
<italic>i</italic>
</sub> (green) represents the previous node. <italic>v</italic>
<sub>
<italic>j</italic>
</sub> (blue) is the current node. <italic>v</italic>
<sub>
<italic>k</italic>
</sub> are possible next nodes. Following (3), the next node will be the one that maximizes the dot product; <bold>(B)</bold> possible <italic>hidden leaf</italic> (dashed red box) connected to two nodes; <bold>(C)</bold> node with two neighbours that does not represent a <italic>hidden leaf</italic> as both connecting edges are labelled after dot product traversal from <italic>l</italic>
<sub>1</sub>; <bold>(D)</bold> possible <italic>hidden leaf</italic> (dashed red box) with three neighbours.</p>
</caption>
<graphic xlink:href="frobt-09-832208-g006.tif"/>
</fig>
<p>In the case of two tools present in the image, it is possible to have <italic>hidden leaves</italic> (<xref ref-type="fig" rid="F6">Figures 6B,D</xref>), defined as graph nodes that represent an overlap between the tip of an instrument and another instrument. This situation can easily occur (<xref ref-type="fig" rid="F6">Figure 6</xref>) in surgical tasks, including the task presented in the experiments from <xref ref-type="sec" rid="s6">Section 6</xref>. There are two possible graph arrangements that can lead to <italic>hidden leaves</italic>. A node with exactly two (<xref ref-type="fig" rid="F6">Figure 6B</xref>) or three neighbours (<xref ref-type="fig" rid="F6">Figure 6D</xref>). Nonetheless, the number of neighbours alone does not facilitate the discovery of such <italic>hidden leaves</italic> (and subsequent disentanglement of tools), as it is also possible for a node with exactly two (could be a chain instead) or three (could be a bifurcation instead) neighbours to not be a <italic>hidden leaf</italic> (<italic>see</italic> <xref ref-type="fig" rid="F6">Figure 6C</xref>). Hence, extra information is needed. Aiming in this direction, after each successful traversal from a normal <italic>leaf</italic> to an entry node, all the edges along the path are labelled with the index of the entry node. In addition, all the edges directly connected to an entry node are also labelled.</p>
<p>A node with exactly two or three neighbours whose edges are all labelled with different entry nodes is a <italic>hidden leaf</italic>. Labelling helps to solve some of the <italic>hidden leaf</italic> cases. Such leaves can be duplicated, effectively splitting the graph into two, and disentangling the overlapped instruments. After disentanglement, they become normal leaves which can be assigned to an entry node by dot product traversal <xref ref-type="disp-formula" rid="e3">(3)</xref>. Although not a <italic>hidden leaf</italic>, a node with exactly four neighbours whose edges are labelled represents an overlap which can be trivially disentangled. <italic>Hidden leaves</italic> such as the ones presented in <xref ref-type="fig" rid="F6">Figures 6B,D</xref> cannot be classified with certainty as such just with the graph/skeleton information. As shown in <xref ref-type="fig" rid="F6">Figure 6</xref>, different tool configurations/occlusions could lead to the same graph configuration. As not all the tips can be unambiguously detected, entry nodes that are unmatched after dot product traversal (i.e., they were not reached after launching a traversal from each leaf node to a possible entry node) are paired to the furthest opposing node connected to them.</p>
<p>Although the traversal from tips to entrypoints has been illustrated in this section with one or two instruments (as it is the case in our setup, <italic>see</italic> <xref ref-type="fig" rid="F1">Figure 1</xref>), the dot product traversal generalizes to setups with more instruments as the assumption that the path from tip to entrypoint is the one with less direction changes still holds.</p>
</sec>
<sec id="s4-2-7">
<title>4.2.7 Instrument Graph Pruning</title>
<p>Noisy skeletons can lead to inaccurate graphs containing more than two leaves matched to the same entry node, or more than two entry nodes connected to leaves. In our framework, unmatched entry nodes are deleted. In addition, due to the nature of our experimental setup, a maximum of two tools with two tips each can be found. Therefore, when more than two leaves are matched to the same entry node, only the two furthest are kept. Analogously, when more than two entry nodes are found and matched to leaves, the two kept are those with the longest chain (from entry node to leaves). That is, a maximum of two disentangled instrument graphs remain after pruning.</p>
</sec>
<sec id="s4-2-8">
<title>4.2.8 Left/Right Instrument Identification</title>
<p>In the presence of a single instrument, left/right vertical semi-circles determine whether the instrument is left/right (<italic>see</italic> <xref ref-type="fig" rid="F7">Figure 7</xref>, right), i.e. if the entrypoint of the tool is located in the right half of the image, it is assumed that the subgraph connected to this entrypoint is the right instrument, and viceversa. Note that this simple method is also generalizable to scenarios with three to five instruments, which are different from the two-instrument solo surgery setting examined in this work (<italic>see</italic> <xref ref-type="fig" rid="F8">Figure 8</xref>), but still worth mentioning as there are some endoscopic procedures that may involve such number of tools (<xref ref-type="bibr" rid="B1">Abdi et al., 2016</xref>).</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Left/right instrument identification. Two-instrument case (left). Single-instrument case (right). The location of the entrypoints is used to identify whether the instruments are left/right. When two instruments are visible, an imaginary vertical line (parallel to the vertical axis of the image) that crosses over the central point of the segment connecting both entrypoints is used to determine if the instrument is left/right. When there is only one instrument, the location of the entrypoint with regards to the vertical axis of the image determines which tool is visible. If the entrypoint resides in the right half, as in the figure above, this is considered to be the right instrument.</p>
</caption>
<graphic xlink:href="frobt-09-832208-g007.tif"/>
</fig>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Definition of reference frames in a generalized endoscope model. The incision frame {<italic>i</italic>} is located at the incision point in the body wall, the distal tip frame {<italic>t</italic>} at the end of the straight endoscope shaft and the camera frame {<italic>c</italic>} at the end of the endoscope, in the optical center of the camera. The image produced by the endoscope is also shown (upper right insert), along with the projections of the detected feature of interest <bold>
<italic>s</italic>
</bold> and its desired position <bold>
<italic>s</italic>
</bold>&#x2a;, and with the image coordinate vectors (<bold>
<italic>u</italic>
</bold>, <bold>
<italic>v</italic>
</bold>).</p>
</caption>
<graphic xlink:href="frobt-09-832208-g008.tif"/>
</fig>
<p>When two instruments are detected (i.e. two entrypoints with their corresponding subgraphs), a line segment connecting the entrypoints of both instruments is assumed to be the viewing horizon. A vertical line that is parallel to the vertical axis of the image and cuts through the central point of the viewing horizon defines whether the entrypoints (and subsequently the instruments) are left/right (<italic>see</italic> <xref ref-type="fig" rid="F7">Figure 7</xref>, left).</p>
</sec>
<sec id="s4-2-9">
<title>4.2.9 Tooltip Stereo Matching</title>
<p>Once the tips of the instruments have been detected and classified as right/left instrument, the disparity for each tooltip in the left and right stereo images is estimated using classical intensity-based template matching. As endoscope images are stereo-rectified, template matching with a sliding window of 64 &#xd7; 64 pixels running over the same line (and only in one direction) suffices for the stereo matching. Normalized cross-correlation is the cost function of choice. Given the disparity measure for each tooltip, its depth can be reconstructed using the pinhole camera model and conventional epipolar geometry. The 3D reconstruction was performed with an extended Kalman filter (EKF). The EKF is valuable here, because of its capacity to bridge potential measurement gaps and to reduce the noise on the 3D position estimate, which is very sensitive to small disparity variations, as the lens separation of the <sc>Tipcam</sc> is only 1.6&#xa0;mm. The details of the EKF are specified in <xref ref-type="sec" rid="s5-2-2">Section 5.2.2</xref>.</p>
<p>Although in our proposed experimental setup we use stereovision because we have an stereo-endoscope, many centers still use monoscopic endoscopes. In this case, a method such as that presented by Liu et al. (<xref ref-type="bibr" rid="B45">Liu et al., 2020</xref>) could be used to estimate the 3D tip location directly from the 2D endoscopy.</p>
</sec>
</sec>
</sec>
<sec id="s5">
<title>5 Visual Servoing for Robotic Endoscope Control</title>
<p>A visual servoing controller determines the relative motion between a camera and an observed target in order to produce the desired camera view upon the target. In the case of AIT, the target is the (virtual) instrument tip position <bold>
<italic>s</italic>
</bold>, defined by <xref ref-type="disp-formula" rid="e1">(1)</xref>, and the camera is the endoscope held by the robotic endoscope holder. When working with endoscopes, the visual servoing controller needs to take into account a number of aspects specific for endoscopy, including the presence of the incision point which imposes a geometric constraint and the endoscope geometry. For the online estimation of the incision point, automated routines exist, such as (<xref ref-type="bibr" rid="B19">Dong and Morel, 2016</xref>; <xref ref-type="bibr" rid="B32">Gruijthuijsen et al., 2018</xref>). This section formalizes visual servoing approaches for REC in MIS.</p>
<sec id="s5-1">
<title>5.1 Visual Servoing Approaches</title>
<p>Two classical approaches exist for visual servoing problems: image-based visual servoing (IBVS) and position-based visual servoing (PBVS) (<xref ref-type="bibr" rid="B15">Chaumette and Hutchinson, 2008</xref>). For REC, an extension to these methods is necessary as the camera motion is constrained by the presence of the incision point. In IBVS, this can be done by modifying the interaction matrix, such that it incorporates the kinematic constraint of the incision point (<xref ref-type="bibr" rid="B50">Osa et al., 2010</xref>) or such that it describes the mapping between the image space and the joint space of the robotic endoscope holder (<xref ref-type="bibr" rid="B72">Uecker et al., 1995</xref>; <xref ref-type="bibr" rid="B85">Zhao, 2014</xref>). As these IBVS approaches only act in the image plane, the zoom level can be controlled by a decoupled depth controller (<xref ref-type="bibr" rid="B16">Chen et al., 2018</xref>). PBVS approaches can incorporate the incision constraint in an inverse kinematics algorithm that computes the desired robot pose, given the desired endoscopic view (<xref ref-type="bibr" rid="B83">Yu et al., 2013</xref>; <xref ref-type="bibr" rid="B20">Eslamian et al., 2016</xref>).</p>
<p>Implementations of the above approaches, that the authors are aware of, lack generality: they are formulated for a specific robotic endoscope holder and do not cover oblique-viewing endoscopes, while such endoscopes are commonly used in MIS procedures. Yet, oblique-viewing endoscopes are the most challenging to handle for clinicians (<xref ref-type="bibr" rid="B52">Pandya et al., 2014</xref>), and could thus reap most benefits of REC. Generic constraint-based control frameworks, such as eTaSL (<xref ref-type="bibr" rid="B2">Aertbeli&#xeb;n and De Schutter, 2014</xref>), could be applied with a generalized endoscope model, like presented below, although they are slower than explicit visual servoing methods.</p>
</sec>
<sec id="s5-2">
<title>5.2 Visual Servoing With Generalized Endoscope Model</title>
<p>This section introduces a novel generalized endoscope model for visual servoing that incorporates the incision constraint, as well as the endoscope geometry. Such a model is presented here, along with the ensuing modifications to the classical IBVS and PBVS approaches.</p>
<sec id="s5-2-1">
<title>5.2.1 Generalized Endoscope Model</title>
<p>Endoscopes come in different forms and sizes. Rigid endoscopes are typically straight, but can also be pre-bent. The camera can be oriented collinear with the longitudinal axis of the scope or can be positioned at an oblique angle. Some scopes are flexible over their entire length, others contain a proximal rigid straight portion with a distal bendable portion.</p>
<p>
<xref ref-type="fig" rid="F9">Figure 9</xref> visualizes a general endoscope geometry that encompasses all the above configurations, along with the frames of interest. The incision frame {<italic>i</italic>} is defined at the incision point and is common for all robotic endoscope holders. The <italic>z</italic>-axis of {<italic>i</italic>} is the inward-pointing normal of the body wall. A frame {<italic>t</italic>} is connected to the distal tip of the straight portion of the endoscope shaft, with its <italic>z</italic>-axis pointing along the shaft axis. In the most general case, the camera frame {<italic>c</italic>} is located at an offset and rotated with respect to {<italic>t</italic>}. The offset can reproduce for tip articulation, for stereo camera lens separation. The rotation can account for oblique-viewing endoscopes. As such, this endoscope model can describe complex endoscopes, such as the articulating 3D video endoscope <sc>EndoEye Flex 3D</sc> (<sc>Olympus</sc>, Japan) (<xref ref-type="fig" rid="F10">Figure 10</xref>).</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>
<sc>EndoEye Flex 3D</sc> &#xa9;<sc>Olympus Corporation</sc> (Tokyo, Japan).</p>
</caption>
<graphic xlink:href="frobt-09-832208-g009.tif"/>
</fig>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Comparison of the endoscope tip trajectories for different visual servoing approaches for REC. In this simulation, an oblique-viewing 30&#xb0; endoscope with a 120&#xb0; FOV was used. Its initial and final pose are drawn. In the final pose, the feature of interest <bold>
<italic>s</italic>
</bold> is in the desired position with respect to the camera. The trajectories are drawn for a case with a small initial depth error (top) and a large initial depth error (bottom).</p>
</caption>
<graphic xlink:href="frobt-09-832208-g010.tif"/>
</fig>
<p>Starting from this general endoscope model, different visual servoing approaches for REC will be detailed next. The visual servoing approaches strive to determine the endoscope motion that is needed to match the position <inline-formula id="inf6">
<mml:math id="m9">
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mi>x</mml:mi>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mi>y</mml:mi>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mi>z</mml:mi>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> of the feature of interest, expressed in the camera frame, with its desired position <inline-formula id="inf7">
<mml:math id="m10">
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>, while taking into account the presence of the incision point. The visual servoing approaches assume that the robot endoscope holder has three active DoFs that can produce any linear velocity of the endoscope tip. In order to obtain a fully determined endoscope motion, it is further assumed that the remaining rolling DoF about the endoscope axis is not controlled by the visual servoing controller, but by an external controller. Note that, as was pointed out in <xref ref-type="sec" rid="s3">Section 3</xref>, this DoF could be employed to control the camera horizon.</p>
<p>The following notation will be used in the subsequent sections: a rotation of angle <italic>&#x3be;</italic> about the axis <italic>i</italic> will be denoted by <bold>
<italic>R</italic>
</bold>
<sub>
<italic>i</italic>
</sub>(<italic>&#x3be;</italic>). For a transformation from a frame {<italic>j</italic>} to a frame {<italic>i</italic>}, the notation <inline-formula id="inf8">
<mml:math id="m11">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> will be used, consisting of a rotation <inline-formula id="inf9">
<mml:math id="m12">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> and a translation <inline-formula id="inf10">
<mml:math id="m13">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>. Further, the twist vector <inline-formula id="inf11">
<mml:math id="m14">
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is defined as the concatenation of a linear velocity <bold>
<italic>v</italic>
</bold> and an angular velocity <bold>
<italic>&#x3c9;</italic>
</bold>. For all kinematic variables, the reference frames will be indicated with a trailing superscript. For the features <bold>
<italic>s</italic>
</bold> and the error <bold>
<italic>e</italic>
</bold> in the camera frame, the trailing superscript <sup>
<italic>c</italic>
</sup> is mostly omitted for brevity.</p>
</sec>
<sec id="s5-2-2">
<title>5.2.2 EKF for Tooltip 3D Position Reconstruction</title>
<p>The instrument localization pipeline from <xref ref-type="sec" rid="s4-2">Section 4.2</xref> yields the tooltip image coordinates <italic>u</italic>
<sub>
<italic>l</italic>
</sub>, <italic>v</italic>
<sub>
<italic>l</italic>
</sub> and the disparity <italic>d</italic>
<sub>
<italic>x</italic>
</sub>. The 3D tooltip position, required for the visual servoing methods, is estimated from these measurement data, through an EKF. The state transition model describes a linear tooltip motion of exponentially decreasing velocity, partially expressed in frames {<italic>i</italic>} and {<italic>c</italic>} to limit the non-linearity, and the observation model implements the pinhole camera model:<disp-formula id="e4">
<mml:math id="m15">
<mml:mtable class="align" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x3d;</mml:mo>
<mml:mi>g</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3f5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="normal">&#x394;</mml:mi>
<mml:mi>T</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3f5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(4)</label>
</disp-formula>
<disp-formula id="e5">
<mml:math id="m16">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>h</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3b4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:math>
<label>(5)</label>
</disp-formula>Where <inline-formula id="inf12">
<mml:math id="m17">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the state vector, <inline-formula id="inf13">
<mml:math id="m18">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> is the tooltip position in the camera frame {<italic>c</italic>}, <inline-formula id="inf14">
<mml:math id="m19">
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> is the tooltip velocity in the incision frame {<italic>i</italic>}, <inline-formula id="inf15">
<mml:math id="m20">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> is the camera twist, <bold>
<italic>L</italic>
</bold>
<sub>3<italic>D</italic>
</sub> is the 3D interaction matrix from <xref ref-type="disp-formula" rid="e20">Eq. 20</xref>, <italic>&#x3bb;</italic>
<sub>
<italic>s</italic>
</sub> is a reduction factor <inline-formula id="inf16">
<mml:math id="m21">
<mml:mo>&#x3c;</mml:mo>
<mml:mn>1</mml:mn>
</mml:math>
</inline-formula> (governing the exponential decrease of <inline-formula id="inf17">
<mml:math id="m22">
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>), <inline-formula id="inf18">
<mml:math id="m23">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the observation vector, <italic>f</italic>
<sub>
<italic>x</italic>
</sub>, <italic>f</italic>
<sub>
<italic>y</italic>
</sub>, <italic>c</italic>
<sub>
<italic>x</italic>
</sub>, <italic>c</italic>
<sub>
<italic>y</italic>
</sub> are the intrinsic camera parameters, <italic>b</italic>
<sub>
<italic>c</italic>
</sub> the distance between the optical centres of the (left and right) cameras, and <bold>
<italic>&#x3f5;</italic>
</bold>
<sub>
<italic>k</italic>
</sub> and <bold>
<italic>&#x3b4;</italic>
</bold>
<sub>
<italic>k</italic>
</sub> are the usual process and observation noises. The velocity reduction factor <italic>&#x3bb;</italic>
<sub>
<italic>s</italic>
</sub> is introduced to scale down the contribution of dead reckoning during measurement gaps.</p>
</sec>
<sec id="s5-2-3">
<title>5.2.3 Image-Based Visual Servoing (IBVS)</title>
<p>IBVS aims to determine the camera motion to move the 2D projection of the 3D feature point <bold>
<italic>s</italic>
</bold> to its desired position in the image plane. Assuming a pinhole camera model, the 2D projection <bold>
<italic>s</italic>
</bold>
<sub>
<italic>n</italic>
</sub> is obtained by expressing <bold>
<italic>s</italic>
</bold> in normalized camera coordinates:<disp-formula id="e6">
<mml:math id="m24">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mi>x</mml:mi>
<mml:mo>/</mml:mo>
<mml:mi>z</mml:mi>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mi>y</mml:mi>
<mml:mo>/</mml:mo>
<mml:mi>z</mml:mi>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>.</mml:mo>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>Classically, the relation between the camera twist <inline-formula id="inf19">
<mml:math id="m25">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> and the 2D feature point velocity <inline-formula id="inf20">
<mml:math id="m26">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> is expressed by the interaction matrix <bold>
<italic>L</italic>
</bold>
<sub>2<italic>D</italic>
</sub> (<xref ref-type="bibr" rid="B15">Chaumette and Hutchinson, 2008</xref>):<disp-formula id="e7">
<mml:math id="m27">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:math>
<label>(7)</label>
</disp-formula>Where<disp-formula id="e8">
<mml:math id="m28">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mi>z</mml:mi>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:mi>z</mml:mi>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2212;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mi>z</mml:mi>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:mi>z</mml:mi>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(8)</label>
</disp-formula>
</p>
<p>In this equation, it is assumed that the camera has six DoFs, while only three are available for the endoscope control. To incorporate these constraints, the camera twist <inline-formula id="inf21">
<mml:math id="m29">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> needs to be mapped first to the twist of tip of the endoscope&#x2019;s straight portion <inline-formula id="inf22">
<mml:math id="m30">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>:<disp-formula id="e9">
<mml:math id="m31">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:math>
<label>(9)</label>
</disp-formula>Where <inline-formula id="inf23">
<mml:math id="m32">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> is the well-known expression for a twist transformation:<disp-formula id="e10">
<mml:math id="m33">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mn mathvariant="bold-italic">0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
<p>The operator [ ]<sub>&#xd7;</sub> is the operator for the skew-symmetric matrix. The incision constraint introduces a coupling between the linear tip velocity <inline-formula id="inf24">
<mml:math id="m34">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> and angular tip velocity <inline-formula id="inf25">
<mml:math id="m35">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> and can be expressed as:<disp-formula id="e11">
<mml:math id="m36">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:math>
<label>(11)</label>
</disp-formula>With the incision transformation<disp-formula id="e12">
<mml:math id="m37">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mn>1</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>1</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mi>l</mml:mi>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mi>l</mml:mi>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(12)</label>
</disp-formula>And the inserted endoscope length <inline-formula id="inf26">
<mml:math id="m38">
<mml:mi>l</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:math>
</inline-formula>. Combining (<xref ref-type="disp-formula" rid="e7">7</xref>&#x2013;<xref ref-type="disp-formula" rid="e12">12</xref>) yields the modified interaction matrix <inline-formula id="inf27">
<mml:math id="m39">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>:<disp-formula id="e13">
<mml:math id="m40">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
<label>(13)</label>
</disp-formula>Which maps <inline-formula id="inf28">
<mml:math id="m41">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> to <inline-formula id="inf29">
<mml:math id="m42">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula>. This matrix is a generalized form for the modified interaction matrix presented in (<xref ref-type="bibr" rid="B50">Osa et al., 2010</xref>).</p>
<p>As is customary in visual servoing, the error in the normalized image space is expressed as<disp-formula id="e14">
<mml:math id="m43">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
<label>(14)</label>
</disp-formula>And the control law enforces an exponential decay of the error:<disp-formula id="e15">
<mml:math id="m44">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:math>
<label>(15)</label>
</disp-formula>Characterized by the time constant <italic>&#x3c4;</italic> &#x3d; 1/<italic>&#x3bb;</italic>. For a constant <inline-formula id="inf30">
<mml:math id="m45">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, this yields the desired endoscope tip velocity:<disp-formula id="e16">
<mml:math id="m46">
<mml:mtable class="align" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:mo>&#x21d2;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
<mml:mo>&#x2b;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(16)</label>
</disp-formula>
</p>
</sec>
<sec id="s5-2-4">
<title>5.2.4 Image-Based Visual Servoing With Decoupled Depth Control (IBVS&#x2b;DC)</title>
<p>IBVS only seeks to optimize the 2D projected position <bold>
<italic>s</italic>
</bold>
<sub>
<italic>n</italic>
</sub> of the target point <bold>
<italic>s</italic>
</bold> in the image plane. As such IBVS alone is insufficient to control the 3D position of the endoscope. A decoupled depth controller can be added to control the third DoF. This was proposed in (<xref ref-type="bibr" rid="B16">Chen et al., 2018</xref>) and will be generalized here.</p>
<p>The depth controller acts along the <italic>z</italic>-axis of the camera frame {<italic>c</italic>} and uses the kinematic relation between the camera twist <inline-formula id="inf31">
<mml:math id="m47">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> and the change in the depth <italic>z</italic> of <bold>
<italic>s</italic>
</bold>:<disp-formula id="e17">
<mml:math id="m48">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:math>
<label>(17)</label>
</disp-formula>Where<disp-formula id="e18">
<mml:math id="m49">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2212;</mml:mo>
<mml:mi>y</mml:mi>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mi>x</mml:mi>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(18)</label>
</disp-formula>
</p>
<p>To reduce the depth error <italic>e</italic>
<sub>
<italic>z</italic>
</sub> &#x3d; <italic>z</italic> &#x2212; <italic>z</italic>&#x2a;, concurrently with the image-space error <bold>
<italic>e</italic>
</bold>
<sub>
<italic>n</italic>
</sub>, a similar reasoning as with IBVS can be followed, yielding:<disp-formula id="e19">
<mml:math id="m50">
<mml:mtable class="align" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:mo>&#x21d2;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2b;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(19)</label>
</disp-formula>
</p>
<p>To differentiate between directions, it is possible to define <italic>&#x3bb;</italic> as a diagonal matrix, rather than as a scalar.</p>
</sec>
<sec id="s5-2-5">
<title>5.2.5 3D Image-Based Visual Servoing (3D IBVS)</title>
<p>Instead of decoupling the control in the image plane and the depth control, the 3D feature <bold>
<italic>s</italic>
</bold> can also be used directly to define the 3D motion of the endoscope. This requires a 3D interaction matrix <bold>
<italic>L</italic>
</bold>
<sub>3<italic>D</italic>
</sub>, which can be derived from the kinematic equations of motion for the stationary 3D point <bold>
<italic>s</italic>
</bold> in the moving camera frame {<italic>c</italic>}:<disp-formula id="e20">
<mml:math id="m51">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:math>
<label>(20)</label>
</disp-formula>With<disp-formula id="e21">
<mml:math id="m52">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold-italic">I</mml:mi>
</mml:mtd>
<mml:mtd columnalign="center"/>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(21)</label>
</disp-formula>
</p>
<p>As before, the modified interaction matrix <inline-formula id="inf32">
<mml:math id="m53">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> can be obtained by including the offset of the tip frame with respect to the camera frame and the incision constraint. The desired endoscope velocity that ensures an exponential decay of the error <bold>
<italic>e</italic>
</bold> &#x3d; <bold>
<italic>s</italic>
</bold> &#x2212; <bold>
<italic>s</italic>
</bold>&#x2a; follows then from:<disp-formula id="e22">
<mml:math id="m54">
<mml:mtable class="align" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:mo>&#x21d2;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
<mml:mo>&#x2b;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(22)</label>
</disp-formula>
</p>
</sec>
<sec id="s5-2-6">
<title>5.2.6 Position-Based Visual Servoing (PBVS)</title>
<p>PBVS identifies the camera pose, with respect to an external reference frame, that produces the desired view upon the 3D feature <bold>
<italic>s</italic>
</bold> and moves the camera towards this pose. As mentioned before, the camera pose is constrained to three DoFs due to the presence of the incision point and the separate horizon stabilization. Finding the desired camera pose, while taking into account its kinematic constraints, involves solving the inverse kinematics for the endoscope as defined in <xref ref-type="fig" rid="F9">Figure 9</xref>.</p>
<p>The forward kinematics of the endoscope can be described as a function of three joint variables (<italic>&#x3b8;</italic>
<sub>1</sub>, <italic>&#x3b8;</italic>
<sub>2</sub>, <italic>l</italic>). Based on these variables, any endoscope pose can be reached by applying successive operations in a forward kinematics chains. When these joint variables are set to zero, the endoscope system is in a configuration where the incision frame {<italic>i</italic>} coincides with the distal tip frame {<italic>t</italic>}, while the camera frame is offset by <inline-formula id="inf33">
<mml:math id="m55">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> and rotated by <inline-formula id="inf34">
<mml:math id="m56">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, with <italic>&#x3b1;</italic> the oblique viewing angle of the endoscope. Starting from this configuration, <italic>&#x3b8;</italic>
<sub>1</sub> rotates {<italic>t</italic>} about its <italic>y</italic>-axis, then <italic>&#x3b8;</italic>
<sub>2</sub> rotates it about its <italic>x</italic>-axis and finally <italic>l</italic> translates it along its <italic>z</italic>-axis. This leads to the following forward kinematic equations, expressed in the reference frame {<italic>i</italic>}:<disp-formula id="e23">
<mml:math id="m57">
<mml:mtable class="align" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(23)</label>
</disp-formula>
<disp-formula id="equ1">
<mml:math id="m58">
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn mathvariant="bold-italic">0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mn mathvariant="bold-italic">0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mi mathvariant="bold-italic">I</mml:mi>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mn mathvariant="bold-italic">0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mn mathvariant="bold-italic">0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:math>
</disp-formula>With <inline-formula id="inf35">
<mml:math id="m59">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> the unit vector along the <italic>z</italic>-direction. The trailing &#x2a; designates a desired value, different from the current value. The <inline-formula id="inf36">
<mml:math id="m60">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mspace width="0.3333em"/>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> signifies the homogeneous representation of a 3D vector. <xref ref-type="disp-formula" rid="e23">Eq. 23</xref> constitutes a system of three equations in the unknowns <inline-formula id="inf37">
<mml:math id="m61">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. 1 elaborates the analytic solution to this inverse kinematics problem.</p>
<p>The solution of the inverse kinematics can be inserted in the forward kinematics equations to obtain the desired position of the distal endoscope tip <inline-formula id="inf38">
<mml:math id="m62">
<mml:msup>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>:<disp-formula id="e24">
<mml:math id="m63">
<mml:msup>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>sin</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mi>cos</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>sin</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>cos</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mi>cos</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(24)</label>
</disp-formula>Which straightforwardly leads to the position error of the distal tip, expressed with respect to the incision frame {<italic>i</italic>}:<disp-formula id="e25">
<mml:math id="m64">
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>.</mml:mo>
</mml:math>
<label>(25)</label>
</disp-formula>
</p>
<p>When an exponential decaying error is required, the desired endoscope velocity becomes:<disp-formula id="e26">
<mml:math id="m65">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
<label>(26)</label>
</disp-formula>And can be expressed in the frame {<italic>t</italic>} as:<disp-formula id="e27">
<mml:math id="m66">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>.</mml:mo>
</mml:math>
<label>(27)</label>
</disp-formula>
</p>
</sec>
</sec>
<sec id="s5-3">
<title>5.3 Simulation of Visual Servoing Methods</title>
<p>A simulation was implemented to validate all four visual servoing methods for REC: IBVS, IBVS&#x2b;DC, 3D IBVS and PBVS. <xref ref-type="fig" rid="F11">Figure 11</xref> presents a visual comparison between them, for a 30&#xb0; oblique-viewing endoscope with a 120&#xb0; FoV. In all simulations, <bold>
<italic>s</italic>
</bold> enters the FoV from a side. The visual serviong controller moves the endoscope to center <bold>
<italic>s</italic>
</bold> within its FoV at a given depth <italic>z</italic>&#x2a;, or <inline-formula id="inf39">
<mml:math id="m67">
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>. The trajectories described by the endoscope tip are shown in the graphs, as well as the initial (marked in black) and final (marked in grey) endoscope poses.</p>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption>
<p>LASTT (<xref ref-type="bibr" rid="B22">European Academy of Gynaecological Surgery, 2020</xref>) laparoscopic training model. Inital position of the pins before starting the bi-manual coordination task (left). Procedure to pass a pin from the non-dominant to the dominant hand (right).</p>
</caption>
<graphic xlink:href="frobt-09-832208-g011.tif"/>
</fig>
<sec id="s5-3-1">
<title>5.3.1 Comparison of Visual Servoing Methods</title>
<p>From the graphs, it is clear that IBVS differs from the other approaches in that, by construction, it does not attain the desired depth <italic>z</italic>&#x2a; in the final endoscope pose. Moreover, IBVS also doesn&#x2019;t guarantee a constant depth <italic>z</italic>. Consequently, <italic>z</italic> will drift towards undesired depths over time. In some configurations, this can be counteracted by separately controlling <italic>l</italic> to stay constant, but this does not hold for the general case. IBVS alone is thus unsuitable for REC and 3D information about <bold>
<italic>s</italic>
</bold> is a requirement.</p>
<p>Both IBVS&#x2b;DC and 3D IBVS linearize the visual servoing problem in the camera frame. This enables a desired exponential decay of the targeted errors, but does not produce a well-controlled endoscope motion in Cartesian space. It can be seen from <xref ref-type="fig" rid="F11">Figure 11</xref> that the trajectories for these methods deviate from the straight trajectory that is accomplished by PBVS, and more so for large initial errors. As space is limited in REC and the environment delicate, the straight trajectory of PBVS appears favourable compared to its alternatives.</p>
<p>IBVS typically yields more accurate visual servoing results than PBVS, because the feedback loop in IBVS-based methods can mitigate camera calibration errors (excluding stereo calibration errors). However, the objective in REC often is to keep <bold>
<italic>s</italic>
</bold> inside a specific region of the endoscopic image (cf. position hysteresis), rather than at an exact image coordinate. The importance of the higher accuracy of IBVS is thus tempered by this region-based control objective: small calibration inaccuracies are acceptable. Therefore, and in contrast to the claims in (<xref ref-type="bibr" rid="B50">Osa et al., 2010</xref>), it can be argued that the predictability of a straight visual servoing trajectory outweighs the importance of the visual servoing accuracy. This argumentation points out why PBVS is the preferred approach for REC, especially when large initial errors exist.</p>
</sec>
<sec id="s5-3-2">
<title>5.3.2 Hybrid PBVS and 3D IBVS</title>
<p>If the accuracy of PBVS would need to be enhanced, e.g., when significant calibration errors exist, it is possible to apply a hybrid visual servoing method. PBVS can be used until the initial error drops below a certain threshold and from there, the visual servoing controller gradually switches to an IBVS-based approach for refinement, by applying a weighted combination of the desired tip velocities <inline-formula id="inf40">
<mml:math id="m68">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> computed by each visual servoing method. The curved shape of IBVS trajectories can thus be suppressed. In experiments that are not further documented here, it was observed that 3D IBVS, which ascertains an exponential Cartesian error decay, provided a more predictable and thus more desirable endoscope behaviour than IBVS&#x2b;DC. To ensure robustness against potential calibration errors, the hybrid combination of PBVS and 3D IBVS was thus selected for the experiments in <xref ref-type="sec" rid="s6">Section 6</xref>. <xref ref-type="fig" rid="F11">Figure 11</xref> shows the simulated performance of the hybrid visual servoing approach, which gradually transitions from PBVS to 3D IBVS when the error &#x2016;<bold>
<italic>e</italic>
</bold>
<sub>
<italic>n</italic>
</sub>&#x2016; in the normalized image space goes from 0.6 to 0.3.</p>
</sec>
</sec>
</sec>
<sec id="s6">
<title>6 Experiments</title>
<p>To determine the feasibility of the proposed autonomous endoscopy framework, an experimental setup was built (<italic>see</italic> <xref ref-type="fig" rid="F1">Figure 1</xref>). The mockup surgical setting consisted of a laparoscopic skills testing and training model (LASTT) placed within a laparoscopic box trainer (<italic>see</italic> Figure 8). A <italic>bi-manual coordination</italic> exercise was chosen as the target surgical task for the experiments. In this task, a set of pushpins need to be passed between hands and placed in the right pockets. The choice of both laparoscopic trainer and surgical task was clinically motivated. The present study is largely inspired by the surgical scenario occurring during spina bifida <xref ref-type="bibr" rid="B12">Bruner (1999)</xref>; <xref ref-type="bibr" rid="B47">Meuli and Moehrlen (2014)</xref>; <xref ref-type="bibr" rid="B38">Kabagambe et al. (2018)</xref> surgical procedures (<italic>see</italic> <xref ref-type="fig" rid="F12">Figure 12</xref>). In this fetal treatment, a surgeon operates while another one guides the endoscope. The LASTT model along with the bi-manual coordination task have been developed by The European Academy for Gynaecological Surgery<xref ref-type="fn" rid="fn4">
<sup>3</sup>
</xref> as an initiative to improve quality control, training and education in gynaecological surgery (<xref ref-type="bibr" rid="B13">Campo et al., 2012</xref>). Therefore, they are ideal candidates for the feasibility study of the proposed autonomous endoscopy framework.</p>
<fig id="F12" position="float">
<label>FIGURE 12</label>
<caption>
<p>Spina bifida intervention performed on an animal model. The surgeon dressed in blue scrubs controls the instruments and manipulates the tissue. The colleague dressed in green guides and holds the endoscope camera during the intervention. The yellow arrows point to the hand of the assistant guiding the camera. As becomes evident in the pictures above, this operating arrangement is not ergonomic, leading to discomfort that increases with the duration of the intervention, and severely limiting the tasks that the surgeon controlling the camera can perform. Picture courtesy of Prof. Jan Deprest.</p>
</caption>
<graphic xlink:href="frobt-09-832208-g012.tif"/>
</fig>
<sec id="s6-1">
<title>6.1 Bi-manual Coordination Task</title>
<p>This task starts by placing a set of coloured pushpins at the base of the LASTT model (<italic>see</italic> <xref ref-type="fig" rid="F8">Figure 8</xref>, left). There are two pins of each colour. The operator has to pick a pin with the non-dominant hand, pass it to the dominant hand (<xref ref-type="fig" rid="F8">Figure 8</xref>, right), and place it inside the pocket of the same colour. The LASTT task is successfully completed when a pushpin of each of the six colours has been placed in a corresponding pocket, within less than 5&#xa0;min. If the pin is dropped during the procedure, a second pin of the same colour has to be picked up from the base. If the second pin is also dropped, the test is considered a failure.</p>
<p>As shown in the demonstration video of the bi-manual coordination task with the LASTT model of the European Academy of Gynaecological Surgery<xref ref-type="fn" rid="fn5">
<sup>4</sup>
</xref>, this exercise cannot be performed with a fixed immobile endoscope due to the reduced field of view of the endoscope, the limited space available for maneuvers within the operating cavity, and the small size of the pins (which resembles small tissue structures). All of these characteristics of the LASTT model mimic the real operating conditions, particularly for gynaecological interventions. Without a robotic endoscope holder, the bi-manual coordination task is performed with one trainee handling the laparoscopic graspers and another trainee acting as the (human) camera assistant. The assistant should hold the endoscope and keep the view centered on what the laparoscopic operator is doing. In our experiments, this human camera assistant is replaced by the <sc>Virtuose6D</sc>
<xref ref-type="fn" rid="fn6">
<sup>5</sup>
</xref> (<sc>Haption SA</sc>, Laval, France) robotic arm. As shown in (<xref ref-type="bibr" rid="B7">Avellino et al., 2020</xref>), the dimensions, workspace, and supported payload of this robotic arm are well suited for robotic endoscope control<xref ref-type="fn" rid="fn7">
<sup>6</sup>
</xref>. The operational workspace is defined as a cube of side 450&#xa0;mm and is located in the center of the workspace envelope. The extremities of the workspace envelope are bounded by a volume of 1330, &#xd7;, 575 &#xd7; 1020&#x2009;&#xa0;mm<sup>3</sup>. The payload supported by the <sc>Virtuose6D</sc> is 35&#xa0;N (peak)/10&#xa0;N (continuous). Additionally, the Virtuose6D features passive gravity compensation, which can be mechanically adjusted to carry up to 8&#xa0;N. Therefore, although in our setup we are using a stereo-endoscope, this system is also able to hold laparoscopy cameras (e.g. those used in abdominal surgery). In our setup, the robotic arm was holding the <sc>Karl Storz</sc> <sc>Tipcam1</sc>, as shown in <xref ref-type="fig" rid="F1">Figure 1</xref>. The <sc>Virtuose6D</sc> was programmed to respond to semantically rich AIT instructions (<xref ref-type="sec" rid="s3">Section 3</xref>). Additionally, it featured a comanipulation fallback mode (<xref ref-type="sec" rid="s3-2">Section 3.2</xref>), which it naturally supports owing to its mechanical backdrivability.</p>
</sec>
<sec id="s6-2">
<title>6.2 Study Participants</title>
<p>A total of eight subjects participated in the study. Two <italic>surgeons</italic>, two <italic>plateau novices</italic>, and four <italic>novices</italic>. The <italic>plateau novices</italic> were authors of the study, who started out as novices, but familiarized themselves with the system and the task until they reached a plateau in the learning curve. Each participant performed the bi-manual coordination task five times. Before these trials, each participant practised 5&#x2013;10&#xa0;min to perform the task while assisted by the robotic endoscope holder.</p>
</sec>
<sec id="s6-3">
<title>6.3 Configuration of the Autonomous Endoscope for the Study</title>
<p>The autonomous endoscope controller implemented the <italic>Hybrid PBVS and 3D IBVS</italic> method (<xref ref-type="sec" rid="s5-3-2">Section 5.3.2</xref>), switching from PBVS to 3D IBVS when the error &#x2016;<bold>
<italic>e</italic>
</bold>
<sub>
<italic>n</italic>
</sub>&#x2016; in the normalised image space decreased from 0.6 to 0.3. The target position of the endoscope tip was set to <inline-formula id="inf41">
<mml:math id="m69">
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>, where <italic>z</italic>&#x2a; &#x3d; 8&#xa0;cm. The endoscope tip was controlled to track a trajectory towards its desired position with the tip velocity <inline-formula id="inf42">
<mml:math id="m70">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> limited to 2&#xa0;cm/s. This trajectory was implemented as a soft virtual fixture, with stiffness of 0.3&#xa0;N/mm. The aforementioned low speed and stiffness were found to provide smooth and predictable motions. They proved also helpful in avoiding sudden motions when one of the instruments is occluded and the remaining one is located in the violation zone. Low speed and stiffness were also necessary because of the 340&#xa0;ms delay on the measurements updates of <bold>
<italic>s</italic>
</bold>. The framegrabber was responsible for 230&#xa0;ms of this delay. A framegrabber that supports NVIDIA GPUDirect, not available to us at the time of writing, could be used to mitigate this latency. The other 110&#xa0;ms came from the 3D tooltip localisation pipeline (<xref ref-type="sec" rid="s4-2">Section 4.2</xref>). A delay that could be potentially reduced in future work using TensorRT. Measurements were available at 9&#xa0;Hz.</p>
<p>The position hysteresis approach, which was illustrated in <xref ref-type="fig" rid="F2">Figure 2</xref>, was applied separately in the image plane and along the viewing direction. In the image plane, the target zone A occupied the first 40% of the endoscopic image radius, the transition zone B the next 20%, and the violation zone C the remaining 40%. Along the viewing axis, the target zone was set to 3&#xa0;cm in both directions of <italic>z</italic>&#x2a;. The violation zone started at a distance of 5&#xa0;cm with respect to <italic>z</italic>&#x2a;. The EKF for stereo reconstruction (<xref ref-type="sec" rid="s4-2-9">Section 4.2.9</xref>) was used to fill missing data up to 1&#xa0;&#x2009;s after the last received sample. When the instruments were lost from the view for more than 10&#xa0;s, the REC switched from the AIT mode to the comanipulation fallback mode, waiting to be manually reset to a safe home position.</p>
<p>When designing the experiments, two preliminary observations were made: <italic>1</italic>) the AIT instruction that fixes the tracking target on the tip of the instrument held by the dominant hand was most convenient, and <italic>2</italic>) instructions to change the zoom level were not used. The latter observation is easily explained by the nature of the LASTT task, which requires overview rather than close-up inspections. The former observation points out that it is confusing to track a virtual instrument tip in between the real tooltips. While concentrated on the task, participants tend to forget their non-dominant hand and move it out of the view (or in and out of the view without a particular reason). This affected the position of the virtual instrument tip in unexpected ways. In those situations where tracking the non-dominant hand is relevant (e.g., when passing the pin from one hand to another), participants quickly learned to keep the tips together. Hence, tracking the dominant-hand tool was sufficient to provide a comfortable view, and this became the only operation mode that participants used. In fact, as an operator, it was convenient to know that the system is tracking your dominant hand: this is easy to understand and remember. Thus, during all the experiments, the only instruction that was issued was to make the camera track the dominant hand. As all participants were right-handed, <bold>
<italic>s</italic>
</bold> was assigned to the tip of the right-hand tool.</p>
</sec>
</sec>
<sec sec-type="results|discussion" id="s7">
<title>7 Results and Discussion</title>
<p>In this section we provide quantitative results on the tooltip tracking accuracy, the responsiveness and usability of the visual servoing endoscopic guidance, and the learning curve of the user study participants.</p>
<sec id="s7-1">
<title>7.1 Validation of Instrument Localization Pipeline</title>
<p>Given an endoscopic video frame as input, the tooltip localization pipeline produces an estimate of the 2D location of the surgical instrument tips (<italic>see</italic> <xref ref-type="fig" rid="F3">Figure 3</xref>). The tooltip location in image coordinates is used for the later 3D position reconstruction of the tooltips. Therefore, we first validate the localization pipeline performance independently of the overall task. This includes the instrument segmentation (<xref ref-type="sec" rid="s4-2-3">Section 4.2.3</xref>) together with the subsequent tooltip detection steps (<xref ref-type="sec" rid="s4-2-4">Sections 4.2.4</xref>&#x2013;<xref ref-type="sec" rid="s4-2-8">4.2.8</xref>).</p>
<p>For the selected bi-manual coordination task of <xref ref-type="sec" rid="s6">Section 6</xref>, two laparoscopic instruments are used. Hence, a maximum of four tips may be encountered in any given endoscopic video frame. Two for the left and two for the right instrument. We define a bounding box around each detected tooltip. The chosen size for the bounding box is 200, &#xd7;, 200 pixels (cf. 1080p raw video frames). This corresponds to the size of the instrument distal part at a practical operation depth (<xref ref-type="fig" rid="F13">Figure 13</xref>). A comparison between the bounding box and the image size is shown in <xref ref-type="fig" rid="F13">Figure 13</xref>.</p>
<fig id="F13" position="float">
<label>FIGURE 13</label>
<caption>
<p>Image crop of visible area of 1080p endoscope video frame. The green square is the 200 <bold>
<italic>&#xd7;</italic>
</bold>200 pixel bounding box used to evaluate detection performance. An intersection over union <bold>
<italic>&#x2265; 50%</italic>
</bold> between predicted and ground truth bounding boxes is considered a correct detection.</p>
</caption>
<graphic xlink:href="frobt-09-832208-g013.tif"/>
</fig>
<p>Following common practice in object detection (<xref ref-type="bibr" rid="B23">Everingham et al., 2015</xref>), a <inline-formula id="inf43">
<mml:math id="m71">
<mml:mo>&#x2265;</mml:mo>
<mml:mn>50</mml:mn>
<mml:mi>%</mml:mi>
</mml:math>
</inline-formula> intersection over union (IoU) between the prediction and ground truth bounding boxes is considered a true positive. A predicted bounding box that does not surpass this threshold represents a false positive. The Hungarian method is employed to match predictions to ground truth bounding boxes. The number of unmatched or missed bounding boxes from the ground truth represents the false negatives. In object detection, precision and recall at different confidence levels are commonly blended into a single performance metric, the average precision (AP) (<xref ref-type="bibr" rid="B23">Everingham et al., 2015</xref>). In the absence of a confidence level, we report precision and recall.</p>
<p>The testing set that we use to report results for the whole tooltip localization pipeline comprises 379 images. These images are evenly sampled video frames extracted at a constant frequency from the recording of the user study experiments, when participants operate the robot (i.e. they are not used during the training or validation of the segmentation model). Our tooltip tracking localization pipeline achieved a tooltip detection precision and recall of 72.45 and 61.89%, respectively. In 84.46% of the video frames, at least one of the present tips was correctly detected.</p>
</sec>
<sec id="s7-2">
<title>7.2 Responsiveness of the Endoscopic Guidance</title>
<p>The responsiveness of the proposed system was also evaluated. To navigate outside the view, participants have to place the tip of the instrument in the violation zone C (<italic>see</italic> <xref ref-type="fig" rid="F2">Figure 2</xref> for a description of the zones). When this occurs, the AIT functionality is triggered until the instrument appears in zone A. <xref ref-type="fig" rid="F14">Figure 14</xref> shows how long it took for the system to recover (entering zone A) after a violation (entering zone C) was detected. As shown in the figure, the control was responsive, taking an average of <inline-formula id="inf44">
<mml:math id="m72">
<mml:mo>&#x2248;</mml:mo>
<mml:mn>3</mml:mn>
</mml:math>
</inline-formula>s (<inline-formula id="inf45">
<mml:math id="m73">
<mml:mo>&#x2248;</mml:mo>
<mml:mn>2</mml:mn>
</mml:math>
</inline-formula>s in the viewing direction) to bring back the instrument tips to zone A. The slight difference between the correction time in the viewing direction and image plane is due to the difference in size of the zone A, which was relatively large along the viewing direction and therefore harder to violate.</p>
<fig id="F14" position="float">
<label>FIGURE 14</label>
<caption>
<p>Completion time for all the participants in the bi-manual coordination task. <bold>(A)</bold> Completion time across attempts, with <italic>novices</italic> (top), <italic>plateau novices</italic> (centre), and <italic>surgeons</italic> (bottom). <bold>(B)</bold> Completion time per group across all trials.</p>
</caption>
<graphic xlink:href="frobt-09-832208-g014.tif"/>
</fig>
<p>In <xref ref-type="fig" rid="F14">Figure 14</xref>, a number of outliers are present. This occurred when the participants moved their hands too fast for the REC to follow, causing the instruments to entirely disappear from the FoV. On most of these occasions, the participants were able to put the instruments back inside FoV after some time, resuming normal navigation. However, in three instances (of the outliers <inline-formula id="inf46">
<mml:math id="m74">
<mml:mo>&#x3e;</mml:mo>
<mml:mn>10</mml:mn>
</mml:math>
</inline-formula>s), the endoscope had to be manually brought back to a safe, centered home position, using its comanipulation fallback mode.</p>
</sec>
<sec id="s7-3">
<title>7.3 Usability of the Endoscopic Guidance</title>
<p>When a human trainee is operating the endoscope, it is important for the coordination and the overview of the surgeon that the view remains centred around the instrument. This is also the objective when a human trainee is operating the endoscope. To quantify this aspect, <xref ref-type="fig" rid="F15">Figure 15</xref> shows the distribution of tip positions for the dominant-hand instrument across all the experiments. The REC indeed manages to keep the tooltip within the boundaries of the target zone A for most of the time. In the 2D image plane, the tip of the instrument was 46<italic>%</italic>, 23<italic>%</italic>, and 31<italic>%</italic> in target, transition, and violation zones, respectively. Similar behaviour was observed along the viewing direction, with a cumulative zone presence of 66<italic>%</italic>, 22<italic>%</italic>, and 12<italic>%</italic>, respectively.</p>
<fig id="F15" position="float">
<label>FIGURE 15</label>
<caption>
<p>Time taken to correct the position of the endoscope after the dominant-hand tooltip entered the violation zone.</p>
</caption>
<graphic xlink:href="frobt-09-832208-g015.tif"/>
</fig>
</sec>
<sec id="s7-4">
<title>7.4 Surgical Skills Assessment and Learning Curve on the Bi-manual Coordination Task</title>
<p>The proposed system allowed the user study participants to perform the benchmark surgical task<xref ref-type="fn" rid="fn8">
<sup>7</sup>
</xref> with autonomous endoscope guidance within the allocated time. The completion time is shown in <xref ref-type="fig" rid="F16">Figure 16</xref>. The average completion time for the 40 trials was 172&#xa0;s (only one outlier exceeding 300&#xa0;s). As shown in the figure, the completion time for the <italic>plateau novices</italic> was relatively constant. This was not the case for <italic>novices</italic> and <italic>surgeons</italic>, where a learning curve can be appreciated despite the initial 5&#x2013;10&#xa0;min of practice. The average completion time across participants decreased from 209&#xa0;s in the first attempt to 144&#xa0;s in the last exercise. These results indicate that the system provided repeatable behaviour that participants were able to learn.</p>
<fig id="F16" position="float">
<label>FIGURE 16</label>
<caption>
<p>Distribution of dominant-hand tooltip presence across all the experiments in the 2D image and viewing direction.</p>
</caption>
<graphic xlink:href="frobt-09-832208-g016.tif"/>
</fig>
</sec>
</sec>
<sec id="s8">
<title>8 Conclusion</title>
<p>In this work we proposed the use of <italic>semantically rich instructions</italic> to govern the interaction between a robotic autonomous endoscope holder and the operating surgeon. These are instructions such as &#x201c;focus on the right tool&#x201d; or &#x201c;focus the camera between the instruments.&#x201d; This opposes previous endoscope holders handled <italic>via</italic> commands such as &#x201c;move up&#x201d; or &#x201c;zoom in.&#x201d; <italic>Semantically rich instructions</italic> are similar to the instructions surgeons would issue to a human camera operator, and can therefore be naturally adopted in clinical practice. Thus, we believe that they may be a powerful tool to increase clinical acceptance.</p>
<p>As a first step towards implementing these instructions within a robotic endoscope holder, we concentrated our efforts on <italic>semantically rich instructions</italic> related to surgical instruments, which we called <italic>autonomous instrument tracking (AIT)</italic> instructions. To implement these instructions we built a robotic system capable of executing them without the need for additional sensors besides the endoscope. To the best of our knowledge, we are the first to report how to construct an autonomous instrument tracking system that allows for solo-surgery using only the endoscope as a sensor to track the instruments. Within the proposed system we included a novel tooltip detection method and a new visual servoing approach for a generalized endoscope model with support for remote center of motion and endoscope bending.</p>
<p>We found that our proposed localization method was able to detect tips in 84.46% of the frames, which in combination with our visual servoing approach allowed for a robust autonomous guidance of the endoscope. With regards to the visual servoing method, we found that a hybrid of position-based visual servoing (PBVS) and 3D image-based visual-servoing (IBVS) is preferred for robotic endoscope control.</p>
<p>During our experimental campaign we found that the REC-enabled AIT instructions yielded a predictable behaviour of the robotic endoscope holder that could be quickly understood and learned by the participants. The participants were able to execute a proven bi-manual coordination task within the prescribed completion time while assisted by the robotic endoscope holder. In three of the exercise runs, it was observed that the comanipulation fallback mode was required to solve for situations in which the instruments moved out of the view and the operator was unable to recover them in the view. This comanipulation mode thus ensures that failures in which the robotic endoscope holder has to be abandoned can be dealt with swiftly. An additional instruction to move back the robotic endoscope holder to a safe overview position could be considered as well. Such a safe location could for instance be close to the remote centre of motion (at the incision point). Although for the general case, when flexible instruments are used, care should be paid that such retraction does not cause the bending segment to hinge behind anatomic structures.</p>
<p>Besides the framework evaluation already performed, an in-depth comparison between human and robotic endoscope control remains as future work. Aspects such as time of completion, smoothness of motions, the stability of the image, number of corrections to the target zone, and average position of the instruments in the view remain to be compared. This contrast would quantify the difference in navigation quality between the proposed framework and a human-held endoscope.</p>
<p>While AIT instructions are necessary in most laparoscopic procedures, they are not the only instructions required for a semantic control of the endoscope holder, and it is a limitation of this study that it only focused on them. Therefore, we are positive that this work will pave the way for further developments to enlarge the set of <italic>semantically rich instructions</italic>.</p>
</sec>
<sec id="s9">
<title>1 Inverse Kinematics Solution to PBVS</title>
<p>The inverse kinematics problem <xref ref-type="disp-formula" rid="e23">(23)</xref> can be solved analytically to obtain <inline-formula id="inf47">
<mml:math id="m75">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. This problem has four possible solutions. To select the appropriate solution, it is important that the <italic>z</italic>-axis of {<italic>i</italic>} is defined as the inward-pointing normal of the body wall. As a first step, <xref ref-type="disp-formula" rid="e23">(23)</xref> should be rewritten as:<disp-formula id="e28">
<mml:math id="m76">
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold-italic">0</mml:mn>
<mml:mo>.</mml:mo>
</mml:math>
<label>(28)</label>
</disp-formula>
</p>
<p>Next, <italic>l</italic>&#x002A; needs to be extracted from each expression in (<italic>f</italic>
<sub>
<italic>x</italic>
</sub>, <italic>f</italic>
<sub>
<italic>y</italic>
</sub>, <italic>f</italic>
<sub>
<italic>z</italic>
</sub>), yielding respective expressions <inline-formula id="inf48">
<mml:math id="m77">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. Equating <inline-formula id="inf49">
<mml:math id="m78">
<mml:msubsup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> and rewriting the result, eliminates <inline-formula id="inf50">
<mml:math id="m79">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> and an expression of the form<disp-formula id="e29">
<mml:math id="m80">
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>sin</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>cos</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:math>
<label>(29)</label>
</disp-formula>Emerges, with <italic>a</italic>
<sub>1</sub>, <italic>b</italic>
<sub>1</sub>, <italic>c</italic>
<sub>1</sub> constants. Solving this for <inline-formula id="inf51">
<mml:math id="m81">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> yields two supplementary angles, of which the solution with the smallest absolute value should be retained. If the expression <inline-formula id="inf52">
<mml:math id="m82">
<mml:msubsup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> is substituted in <italic>f</italic>
<sub>
<italic>x</italic>
</sub> and <italic>f</italic>
<sub>
<italic>z</italic>
</sub>, and both are squared and added according to:<disp-formula id="e30">
<mml:math id="m83">
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
</mml:math>
<label>(30)</label>
</disp-formula>The dependence on <inline-formula id="inf53">
<mml:math id="m84">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> cancels out. Simplifying this equation leads to:<disp-formula id="e31">
<mml:math id="m85">
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>cos</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>cos</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x002A;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
</mml:math>
<label>(31)</label>
</disp-formula>With <italic>a</italic>
<sub>2</sub>, <italic>b</italic>
<sub>2</sub>, <italic>c</italic>
<sub>2</sub> constants. This is a quadratic equation in <inline-formula id="inf54">
<mml:math id="m86">
<mml:mi>cos</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. The solution with the smallest <inline-formula id="inf55">
<mml:math id="m87">
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:math>
</inline-formula> is to be retained, but the sign of <inline-formula id="inf56">
<mml:math id="m88">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> still needs to be confirmed. It is now possible to determine <italic>l</italic>&#x2a;, by plugging the known <inline-formula id="inf57">
<mml:math id="m89">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> and <inline-formula id="inf58">
<mml:math id="m90">
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:math>
</inline-formula> into one of the expressions (<italic>f</italic>
<sub>
<italic>x</italic>
</sub>, <italic>f</italic>
<sub>
<italic>y</italic>
</sub>, <italic>f</italic>
<sub>
<italic>z</italic>
</sub>). For numerical stability, <italic>f</italic>
<sub>
<italic>y</italic>
</sub> should be used if <inline-formula id="inf59">
<mml:math id="m91">
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>sin</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo>&#x3e;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:math>
</inline-formula>, <italic>f</italic>
<sub>
<italic>x</italic>
</sub> if <inline-formula id="inf60">
<mml:math id="m92">
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>sin</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo>&#x3e;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:math>
</inline-formula>, and <italic>f</italic>
<sub>
<italic>z</italic>
</sub> otherwise. As the final step, the two unused expressions within (<italic>f</italic>
<sub>
<italic>x</italic>
</sub>, <italic>f</italic>
<sub>
<italic>y</italic>
</sub>, <italic>f</italic>
<sub>
<italic>z</italic>
</sub>) need to be evaluated to determine the sign of <inline-formula id="inf61">
<mml:math id="m93">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>. If they do not evaluate to 0, <inline-formula id="inf62">
<mml:math id="m94">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> has to be negative and <italic>l</italic>&#x2a; needs to be recomputed.</p>
</sec>
</body>
<back>
<sec id="s10">
<title>Data Availability Statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s14">Supplementary Material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s11">
<title>Author Contributions</title>
<p>LG-P-H and CG have contributed equally to this work and share first authorship.</p>
</sec>
<sec id="s12">
<title>Funding</title>
<p>This work was supported by core and project funding from the Wellcome/EPSRC (WT203148/Z/16/Z; NS/A000049/1; WT101957; NS/A000027/1). This project has received funding from the European Union&#x2019;s Horizon 2020 research and innovation programme under grant agreement No 101016985 (FAROS project). TV is supported by a Medtronic/Royal Academy of Engineering Research Chair (RCSRF1819&#x2216;7&#x2216;34).</p>
</sec>
<sec sec-type="COI-statement" id="s13">
<title>Conflict of Interest</title>
<p>SO and TV are co-founders and shareholders of Hypervision Surgical Ltd.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s14">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s15">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/frobt.2022.832208/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/frobt.2022.832208/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Video1.MP4" id="SM1" mimetype="application/MP4" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<fn-group>
<fn id="fn2">
<label>1</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://www.youtube.com/watch?v=R1qwKAWFOIk">https://www.youtube.com/watch?v&#x3d;R1qwKAWFOIk</ext-link>
</p>
</fn>
<fn id="fn3">
<label>2</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/luiscarlosgph/list-of-surgical-tool-datasets">https://github.com/luiscarlosgph/list-of-surgical-tool-datasets</ext-link>
</p>
</fn>
<fn id="fn4">
<label>3</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://esge.org/centre/the-european-academy-of-gynaecological-surgery">https://esge.org/centre/the-european-academy-of-gynaecological-surgery</ext-link>
</p>
</fn>
<fn id="fn5">
<label>4</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://europeanacademy.org/training-tools/lastt/">https://europeanacademy.org/training-tools/lastt/</ext-link>
</p>
</fn>
<fn id="fn6">
<label>5</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://www.haption.com/en/products-en/virtuose-6d-en.html">https://www.haption.com/en/products-en/virtuose-6d-en.html</ext-link>
</p>
</fn>
<fn id="fn7">
<label>6</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://www.youtube.com/watch?v=R1qwKAWFOIk">https://www.youtube.com/watch?v&#x3d;R1qwKAWFOIk</ext-link>
</p>
</fn>
<fn id="fn8">
<label>7</label>
<p>An exemplary video is located in section &#x201c;Exercise 3: Bi-manual Coordination&#x201d; at <ext-link ext-link-type="uri" xlink:href="https://europeanacademy.org/training-tools/lastt/">https://europeanacademy.org/training-tools/lastt/</ext-link>
</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abdi</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Burdet</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Bouri</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Himidan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bleuler</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>In a Demanding Task, Three-Handed Manipulation Is Preferred to Two-Handed Manipulation</article-title>. <source>Sci. Rep.</source> <volume>6</volume>, <fpage>21758</fpage>. <pub-id pub-id-type="doi">10.1038/srep21758</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Aertbeli&#xeb;n</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>De Schutter</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Etasl/Etc: A Constraint-Based Task Specification Language and Robot Controller Using Expression Graphs</article-title>,&#x201d; in <conf-name>2014 IEEE/RSJ International Conference on Intelligent Robots and Systems</conf-name>, <fpage>1540</fpage>&#x2013;<lpage>1546</lpage>. <pub-id pub-id-type="doi">10.1109/iros.2014.6942760</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Agustinos</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wolf</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Long</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Cinquin</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Voros</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Visual Servoing of a Robotic Endoscope Holder Based on Surgical Instrument Tracking</article-title>,&#x201d; in <conf-name>5th IEEE RAS/EMBS International Conference on Biomedical Robotics and Biomechatronics</conf-name> (<publisher-loc>Sao Paulo, Brazil</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>13</fpage>&#x2013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1109/BIOROB.2014.6913744</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ali</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Lam</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Coonar</surname>
<given-names>A. S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Robotic Camera Assistance: The Future of Laparoscopic and Thoracoscopic Surgery?</article-title> <source>Surg. Innov.</source> <volume>25</volume>, <fpage>485</fpage>&#x2013;<lpage>491</lpage>. <pub-id pub-id-type="doi">10.1177/1553350618784224</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Allan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Shvets</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kurmann</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Duggal</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>Y. H.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>2017 Robotic Instrument Segmentation Challenge</article-title>. <source>Arxiv</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/1902.06426">http://arxiv.org/abs/1902.06426</ext-link>
</comment>. </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Amin</surname>
<given-names>M. S. A.</given-names>
</name>
<name>
<surname>Aydin</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Abbud</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Van Cleynenbreugel</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Veneziano</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Somani</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Evaluation of a Remote-Controlled Laparoscopic Camera Holder for Basic Laparoscopic Skills Acquisition: a Randomized Controlled Trial</article-title>. <source>Surg. Endosc.</source> <volume>35</volume>, <fpage>4183</fpage>&#x2013;<lpage>4191</lpage>. <pub-id pub-id-type="doi">10.1007/s00464-020-07899-5</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Avellino</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Bailly</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Arico</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Morel</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Canlorbe</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Multimodal and Mixed Control of Robotic Endoscopes</article-title>,&#x201d; in <conf-name>Proceedings of the 2020 CHI Conference on Human Factors in Computing Systems</conf-name> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1145/3313831.3376795</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2012</year>). &#x201c;<article-title>Practical Recommendations for Gradient-Based Training of Deep Architectures</article-title>,&#x201d; in <conf-name>Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) 7700 LECTU</conf-name>, <fpage>437</fpage>&#x2013;<lpage>478</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-642-35289-8_26</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Bihlmaier</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Endoscope Robots and Automated Camera Guidance</article-title>,&#x201d; in <source>Learning Dynamic Spatial Relations</source> (<publisher-loc>Wiesbaden</publisher-loc>: <publisher-name>Springer Fachmedien Wiesbaden</publisher-name>), <fpage>23</fpage>&#x2013;<lpage>102</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-658-14914-7_2</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bouarfa</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Akman</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Schneider</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Jonker</surname>
<given-names>P. P.</given-names>
</name>
<name>
<surname>Dankelman</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>In-vivoreal-time Tracking of Surgical Instruments in Endoscopic Video</article-title>. <source>Minimally Invasive Ther. Allied Tech.</source> <volume>21</volume>, <fpage>129</fpage>&#x2013;<lpage>134</lpage>. <pub-id pub-id-type="doi">10.3109/13645706.2011.580764</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bouget</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Allan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Stoyanov</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Jannin</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Vision-based and Marker-Less Surgical Tool Detection and Tracking: a Review of the Literature</article-title>. <source>Med. Image Anal.</source> <volume>35</volume>, <fpage>633</fpage>&#x2013;<lpage>654</lpage>. <pub-id pub-id-type="doi">10.1016/j.media.2016.09.003</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bruner</surname>
<given-names>J. P.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Fetal Surgery for Myelomeningocele and the Incidence of Shunt-dependent Hydrocephalus</article-title>. <source>JAMA</source> <volume>282</volume>, <fpage>1819</fpage>. <pub-id pub-id-type="doi">10.1001/jama.282.19.1819</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Campo</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Molinas</surname>
<given-names>C. R.</given-names>
</name>
<name>
<surname>De Wilde</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Brolmann</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Brucker</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mencaglia</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Are You Good Enough for Your Patients? the European Certification Model in Laparoscopic Surgery</article-title>. <source>Facts Views Vis. Obgyn</source> <volume>4</volume>, <fpage>95</fpage>&#x2013;<lpage>101</lpage>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/pubmed/24753896http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3987500">http://www.ncbi.nlm.nih.gov/pubmed/24753896http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid&#x3d;PMC3987500</ext-link>
</comment>. </citation>
</ref>
<ref id="B14">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Casals</surname>
<given-names>a.</given-names>
</name>
<name>
<surname>Amat</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Laporte</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>1996</year>). &#x201c;<article-title>Automatic Guidance of an Assistant Robot in Laparoscopic Surgery</article-title>,&#x201d; in <conf-name>Proceedings of IEEE International Conference on Robotics and Automation</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>895</fpage>&#x2013;<lpage>900</lpage>. <pub-id pub-id-type="doi">10.1109/ROBOT.1996.503886</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chaumette</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Hutchinson</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Visual Servoing and Visual Tracking</article-title>. <source>Handbook of Robotics</source> <volume>2008</volume>, <fpage>563</fpage>&#x2013;<lpage>583</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-540-30301-5_25</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>P.-J.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>M.-C.</given-names>
</name>
<name>
<surname>Lai</surname>
<given-names>M.-J.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>J.-C.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>H. H.-S.</given-names>
</name>
<name>
<surname>Tseng</surname>
<given-names>V. S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Accurate Classification of Diminutive Colorectal Polyps Using Computer-Aided Analysis</article-title>. <source>Gastroenterology</source> <volume>154</volume>, <fpage>568</fpage>&#x2013;<lpage>575</lpage>. <pub-id pub-id-type="doi">10.1053/j.gastro.2017.10.010</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Col</surname>
<given-names>T. D.</given-names>
</name>
<name>
<surname>Mariani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Deguet</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Menciassi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kazanzides</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>De Momi</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Scan: System for Camera Autonomous Navigation in Robotic-Assisted Surgery</article-title>,&#x201d; in <conf-name>2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)</conf-name>, <fpage>2996</fpage>&#x2013;<lpage>3002</lpage>. <pub-id pub-id-type="doi">10.1109/iros45743.2020.9341548</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Da Col</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Caccianiga</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Catellani</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mariani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ferro</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Cordima</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Automating Endoscope Motion in Robotic Surgery: A Usability Study on da Vinci-Assisted Ex Vivo Neobladder Reconstruction</article-title>. <source>Front. Robot. AI</source> <volume>8</volume>. <pub-id pub-id-type="doi">10.3389/frobt.2021.707704</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Dong</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Morel</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Robust Trocar Detection and Localization during Robot-Assisted Endoscopic Surgery</article-title>,&#x201d; in <conf-name>2016 IEEE International Conference on Robotics and Automation (ICRA)</conf-name>, <fpage>4109</fpage>&#x2013;<lpage>4114</lpage>. <pub-id pub-id-type="doi">10.1109/icra.2016.7487602</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eslamian</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Reisner</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>King</surname>
<given-names>B. W.</given-names>
</name>
<name>
<surname>Pandya</surname>
<given-names>A. K.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Towards the Implementation of an Autonomous Camera Algorithm on the da Vinci Platform</article-title>. <source>Stud. Health Technol. Inform.</source> <volume>220</volume>, <fpage>118</fpage>&#x2013;<lpage>123</lpage>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/pubmed/27046563">http://www.ncbi.nlm.nih.gov/pubmed/27046563</ext-link>
</comment>. </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eslamian</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Reisner</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Pandya</surname>
<given-names>A. K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Development and evaluation of an autonomous camera control algorithm on the da vinci surgical system</article-title>. <source>Int. J. Med. Robot</source> <volume>16</volume>, <fpage>e2036</fpage>. <pub-id pub-id-type="doi">10.1002/rcs.2036</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="web">
<collab>European Academy of Gynaecological Surgery</collab> (<year>2020</year>). <article-title>LASTT</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://europeanacademy.org/training-tools/lastt/">https://europeanacademy.org/training-tools/lastt/</ext-link>
</comment>. </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Everingham</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Eslami</surname>
<given-names>S. M. A.</given-names>
</name>
<name>
<surname>Van Gool</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>C. K. I.</given-names>
</name>
<name>
<surname>Winn</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zisserman</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>The Pascal Visual Object Classes Challenge: A Retrospective</article-title>. <source>Int. J. Comput. Vis.</source> <volume>111</volume>, <fpage>98</fpage>&#x2013;<lpage>136</lpage>. <pub-id pub-id-type="doi">10.1007/s11263-014-0733-5</pub-id> </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fuentes-Hurtado</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Kadkhodamohammadi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Flouty</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Barbarisi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Luengo</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Stoyanov</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>EasyLabels: Weak Labels for Scene Segmentation in Laparoscopic Videos</article-title>. <source>Int. J. CARS</source> <volume>14</volume>, <fpage>1247</fpage>&#x2013;<lpage>1257</lpage>. <pub-id pub-id-type="doi">10.1007/s11548-019-02003-2</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fujii</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Gras</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Salerno</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>G.-Z.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Gaze Gesture Based Human Robot Interaction for Laparoscopic Surgery</article-title>. <source>Med. Image Anal.</source> <volume>44</volume>, <fpage>196</fpage>&#x2013;<lpage>214</lpage>. <pub-id pub-id-type="doi">10.1016/j.media.2017.11.011</pub-id> </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Garcia-Peraza-Herrera</surname>
<given-names>L. C.</given-names>
</name>
<name>
<surname>Fidon</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>D&#x27;Ettorre</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Stoyanov</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Vercauteren</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ourselin</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Image Compositing for Segmentation of Surgical Tools without Manual Annotations</article-title>. <source>IEEE Trans. Med. Imaging</source> <volume>40</volume>, <fpage>1450</fpage>&#x2013;<lpage>1460</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2021.3057884</pub-id> </citation>
</ref>
<ref id="B27">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Garcia-Peraza-Herrera</surname>
<given-names>L. C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Fidon</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gruijthuijsen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Devreker</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Attilakos</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). &#x201c;<article-title>ToolNet: Holistically-Nested Real-Time Segmentation of Robotic Surgical Tools</article-title>,&#x201d; in <conf-name>2017 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>5717</fpage>&#x2013;<lpage>5722</lpage>. <pub-id pub-id-type="doi">10.1109/IROS.2017.8206462</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Garc&#xed;a-Peraza-Herrera</surname>
<given-names>L. C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Gruijthuijsen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Devreker</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Attilakos</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Deprest</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). &#x201c;<article-title>Real-Time Segmentation of Non-rigid Surgical Tools Based on Deep Learning and Tracking</article-title>,&#x201d; in <conf-name>CARE workshop, held in conjunction with MICCAI 2016</conf-name>, <fpage>84</fpage>&#x2013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-54057-3_8</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gillen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pletzer</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Heiligensetzer</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wolf</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kleeff</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Feussner</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Solo-surgical Laparoscopic Cholecystectomy with a Joystick-Guided Camera Device: a Case-Control Study</article-title>. <source>Surg. Endosc.</source> <volume>28</volume>, <fpage>164</fpage>&#x2013;<lpage>170</lpage>. <pub-id pub-id-type="doi">10.1007/s00464-013-3142-x</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gonz&#xe1;lez</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Bravo-S&#xe1;nchez</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Arbelaez</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>ISINet: An Instance-Based Approach for Surgical Instrument Segmentation</article-title>. <source>MICCAI</source> <volume>2020</volume>, <fpage>595</fpage>&#x2013;<lpage>605</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-59716-0_57</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goodell</surname>
<given-names>K. H.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>C. G. L.</given-names>
</name>
<name>
<surname>Schwaitzberg</surname>
<given-names>S. D.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Effects of Cognitive Distraction on Performance of Laparoscopic Surgical Tasks</article-title>. <source>J. Laparoendoscopic Adv. Surg. Tech.</source> <volume>16</volume>, <fpage>94</fpage>&#x2013;<lpage>98</lpage>. <pub-id pub-id-type="doi">10.1089/lap.2006.16.94</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gruijthuijsen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Morel</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Poorten</surname>
<given-names>E. V.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Leveraging the Fulcrum point in Robotic Minimally Invasive Surgery</article-title>. <source>IEEE Robot. Autom. Lett.</source> <volume>3</volume>, <fpage>2071</fpage>&#x2013;<lpage>2078</lpage>. <pub-id pub-id-type="doi">10.1109/lra.2018.2809495</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hanna</surname>
<given-names>G. B.</given-names>
</name>
<name>
<surname>Shimi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cuschieri</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Influence of Direction of View, Target-To-Endoscope Distance and Manipulation Angle on Endoscopic Knot Tying</article-title>. <source>Br. J. Surg.</source> <volume>84</volume>, <fpage>1460</fpage>&#x2013;<lpage>1464</lpage>. <pub-id pub-id-type="doi">10.1111/j.1365-2168.1997.02835.x</pub-id> </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Holl&#xe4;nder</surname>
<given-names>S. W.</given-names>
</name>
<name>
<surname>Klingen</surname>
<given-names>H. J.</given-names>
</name>
<name>
<surname>Fritz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Djalali</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Birk</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Robotic Camera Assistance and its Benefit in 1033 Traditional Laparoscopic Procedures: Prospective Clinical Trial Using a Joystick-Guided Camera Holder</article-title>. <source>Surg. Technol. Int.</source> <volume>25</volume>, <fpage>19</fpage>&#x2013;<lpage>23</lpage>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/pubmed/25419950">http://www.ncbi.nlm.nih.gov/pubmed/25419950</ext-link>
</comment>. </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ioffe</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Szegedy</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift</article-title>. <source>Arxiv</source>. </citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jaspers</surname>
<given-names>J. E. N.</given-names>
</name>
<name>
<surname>Breedveld</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Herder</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Grimbergen</surname>
<given-names>C. A.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Camera and Instrument Holders and Their Clinical Value in Minimally Invasive Surgery</article-title>. <source>Surg. Laparosc. Endosc. Percutaneous Tech.</source> <volume>14</volume>, <fpage>145</fpage>&#x2013;<lpage>152</lpage>. <pub-id pub-id-type="doi">10.1097/01.sle.0000129395.42501.5d</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Shelhamer</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Donahue</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Karayev</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Long</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Girshick</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Caffe: Convolutional Architecture for Fast Feature Embedding</article-title>. <source>arXiv preprint arXiv:1408.5093</source>. </citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kabagambe</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Jensen</surname>
<given-names>G. W.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y. J.</given-names>
</name>
<name>
<surname>Vanover</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Farmer</surname>
<given-names>D. L.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Fetal Surgery for Myelomeningocele: A Systematic Review and Meta-Analysis of Outcomes in Fetoscopic versus Open Repair</article-title>. <source>Fetal Diagn. Ther.</source> <volume>43</volume>, <fpage>161</fpage>&#x2013;<lpage>174</lpage>. <pub-id pub-id-type="doi">10.1159/000479505</pub-id> </citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>King</surname>
<given-names>B. W.</given-names>
</name>
<name>
<surname>Reisner</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Pandya</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Composto</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Ellis</surname>
<given-names>R. D.</given-names>
</name>
<name>
<surname>Klein</surname>
<given-names>M. D.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Towards an Autonomous Robot for Camera Control during Laparoscopic Surgery</article-title>. <source>J. Laparoendoscopic Adv. Surg. Tech.</source> <volume>23</volume>, <fpage>1027</fpage>&#x2013;<lpage>1030</lpage>. <pub-id pub-id-type="doi">10.1089/lap.2013.0304</pub-id> </citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kommu</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Rimington</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Anderson</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ran&#xe9;</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Initial Experience with the EndoAssist Camera-Holding Robot in Laparoscopic Urological Surgery</article-title>. <source>J. Robotic Surg.</source> <volume>1</volume>, <fpage>133</fpage>&#x2013;<lpage>137</lpage>. <pub-id pub-id-type="doi">10.1007/s11701-007-0010-5</pub-id> </citation>
</ref>
<ref id="B41">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Kunze</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Roehm</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Beetz</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2011</year>). &#x201c;<article-title>Towards Semantic Robot Description Languages</article-title>,&#x201d; in <conf-name>2011 IEEE International Conference on Robotics and Automation</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>5589</fpage>&#x2013;<lpage>5595</lpage>. <pub-id pub-id-type="doi">10.1109/ICRA.2011.5980170</pub-id> </citation>
</ref>
<ref id="B42">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Kwon</surname>
<given-names>D.-S.</given-names>
</name>
<name>
<surname>Ko</surname>
<given-names>S.-Y.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2008</year>). &#x201c;<article-title>Intelligent Laparoscopic Assistant Robot through Surgery Task Model: How to Give Intelligence to Medical Robots</article-title>,&#x201d; in <conf-name>Medical Robotics. I-Tech Education and Publishing</conf-name>, <fpage>197</fpage>&#x2013;<lpage>218</lpage>. <pub-id pub-id-type="doi">10.5772/5249</pub-id> </citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Dexter</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Godinez</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Meenaghan</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Catania</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>Ergonomic Risk Associated with Assisting in Minimally Invasive Surgery</article-title>. <source>Surg. Endosc.</source> <volume>23</volume>, <fpage>182</fpage>&#x2013;<lpage>188</lpage>. <pub-id pub-id-type="doi">10.1007/s00464-008-0141-4</pub-id> </citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>T. C.</given-names>
</name>
<name>
<surname>Kashyap</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Chu</surname>
<given-names>C. N.</given-names>
</name>
</person-group> (<year>1994</year>). <article-title>Building Skeleton Models via 3-D Medial Surface Axis Thinning Algorithms</article-title>. <source>CVGIP: Graphical Models Image Process.</source> <volume>56</volume>, <fpage>462</fpage>&#x2013;<lpage>478</lpage>. <pub-id pub-id-type="doi">10.1006/cgip.1994.1042</pub-id> </citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Sinha</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ishii</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hager</surname>
<given-names>G. D.</given-names>
</name>
<name>
<surname>Reiter</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Taylor</surname>
<given-names>R. H.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Dense Depth Estimation in Monocular Endoscopy with Self-Supervised Learning Methods</article-title>. <source>IEEE Trans. Med. Imaging</source> <volume>39</volume>, <fpage>1438</fpage>&#x2013;<lpage>1447</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2019.2950936</pub-id> </citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mariani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Colaci</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Da Col</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Sanna</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Vendrame</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Menciassi</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>An Experimental Comparison towards Autonomous Camera Navigation to Optimize Training in Robot Assisted Surgery</article-title>. <source>IEEE Robot. Autom. Lett.</source> <volume>5</volume>, <fpage>1461</fpage>&#x2013;<lpage>1467</lpage>. <pub-id pub-id-type="doi">10.1109/LRA.2020.2965067</pub-id> </citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meuli</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Moehrlen</surname>
<given-names>U.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Fetal Surgery for Myelomeningocele Is Effective: a Critical Look at the Whys</article-title>. <source>Pediatr. Surg. Int.</source> <volume>30</volume>, <fpage>689</fpage>&#x2013;<lpage>697</lpage>. <pub-id pub-id-type="doi">10.1007/s00383-014-3524-8</pub-id> </citation>
</ref>
<ref id="B48">
<citation citation-type="thesis">
<person-group person-group-type="author">
<name>
<surname>Mudunuri</surname>
<given-names>A. V.</given-names>
</name>
</person-group> (<year>2010</year>). <source>Autonomous Camera Control System for Surgical Robots</source>. <comment>Ph.D. thesis</comment> (<publisher-loc>Detroit, MI, USA</publisher-loc>: <publisher-name>Wayne State University</publisher-name>).</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nishikawa</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Nakagoe</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Taniguchi</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Yamada</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sekimoto</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Takiguchi</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2008</year>). <article-title>How Does the Camera Assistant Decide the Zooming Ratio of Laparoscopic Images? Analysis and Implementation</article-title>. <source>MICCAI</source> <volume>2008</volume>, <fpage>611</fpage>&#x2013;<lpage>618</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-540-85990-1_73</pub-id> </citation>
</ref>
<ref id="B50">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Osa</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Staub</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Knoll</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>Framework of Automatic Robot Surgery System Using Visual Servoing</article-title>,&#x201d; in <conf-name>2010 IEEE/RSJ International Conference on Intelligent Robots and Systems</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1837</fpage>&#x2013;<lpage>1842</lpage>. <pub-id pub-id-type="doi">10.1109/IROS.2010.5650301</pub-id> </citation>
</ref>
<ref id="B51">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Pakhomov</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Navab</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Towards Unsupervised Learning for Instrument Segmentation in Robotic Surgery with Cycle-Consistent Adversarial Networks</article-title>,&#x201d; in <conf-name>2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>8499</fpage>&#x2013;<lpage>8504</lpage>. <pub-id pub-id-type="doi">10.1109/iros45743.2020.9340816</pub-id> </citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pandya</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Reisner</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>King</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Lucas</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Composto</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Klein</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>A Review of Camera Viewpoint Automation in Robotic and Laparoscopic Surgery</article-title>. <source>Robotics</source> <volume>3</volume>, <fpage>310</fpage>&#x2013;<lpage>329</lpage>. <pub-id pub-id-type="doi">10.3390/robotics3030310</pub-id> </citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Platte</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Alleblas</surname>
<given-names>C. C. J.</given-names>
</name>
<name>
<surname>Inthout</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Nieboer</surname>
<given-names>T. E.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Measuring Fatigue and Stress in Laparoscopic Surgery: Validity and Reliability of the star-track Test</article-title>. <source>Minimally Invasive Ther. Allied Tech.</source> <volume>28</volume>, <fpage>57</fpage>&#x2013;<lpage>64</lpage>. <pub-id pub-id-type="doi">10.1080/13645706.2018.1470984</pub-id> </citation>
</ref>
<ref id="B54">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Polski</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fiolka</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Can</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Schneider</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Feussner</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2009</year>). &#x201c;<article-title>A New Partially Autonomous Camera Control System</article-title>,&#x201d; in <conf-name>World Congress on Medical Physics and Biomedical Engineering</conf-name>, <fpage>276</fpage>&#x2013;<lpage>277</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-642-03906-5_75</pub-id> </citation>
</ref>
<ref id="B55">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Rahman</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Optimizing Intersection-Over-Union in Deep Neural Networks for Image Segmentation</article-title>,&#x201d; in <conf-name>Advances in Visual Computing</conf-name>, <fpage>234</fpage>&#x2013;<lpage>244</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-50835-1_22</pub-id> </citation>
</ref>
<ref id="B56">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Reiter</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Goldman</surname>
<given-names>R. E.</given-names>
</name>
<name>
<surname>Bajo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Iliopoulos</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Simaan</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Allen</surname>
<given-names>P. K.</given-names>
</name>
</person-group> (<year>2011</year>). &#x201c;<article-title>A Learning Algorithm for Visual Pose Estimation of Continuum Robots</article-title>,&#x201d; in <conf-name>IROS</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>2390</fpage>&#x2013;<lpage>2396</lpage>. <pub-id pub-id-type="doi">10.1109/iros.2011.6094947</pub-id> </citation>
</ref>
<ref id="B57">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Rivas-Blanco</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Estebanez</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Cuevas-Rodriguez</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bauzano</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Munoz</surname>
<given-names>V. F.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Towards a Cognitive Camera Robotic Assistant</article-title>,&#x201d; in <conf-name>5th IEEE RAS/EMBS International Conference on Biomedical Robotics and Biomechatronics</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>739</fpage>&#x2013;<lpage>744</lpage>. <pub-id pub-id-type="doi">10.1109/BIOROB.2014.6913866</pub-id> </citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rodrigues Armijo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>C.-K.</given-names>
</name>
<name>
<surname>Carlson</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Oleynikov</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Siu</surname>
<given-names>K.-C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Ergonomics Analysis for Subjective and Objective Fatigue between Laparoscopic and Robotic Surgical Skills Practice Among Surgeons</article-title>. <source>Surg. Innov.</source> <volume>27</volume>, <fpage>81</fpage>&#x2013;<lpage>87</lpage>. <pub-id pub-id-type="doi">10.1177/1553350619887861</pub-id> </citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ronneberger</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Fischer</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Brox</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>U-net: Convolutional Networks for Biomedical Image Segmentation</article-title>. <source>MICCAI</source> <volume>2015</volume>, <fpage>234</fpage>&#x2013;<lpage>241</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-24574-4_28</pub-id> </citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ro&#xdf;</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Reinke</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Full</surname>
<given-names>P. M.</given-names>
</name>
<name>
<surname>Wagner</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kenngott</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Apitz</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Comparative Validation of Multi-Instance Instrument Segmentation in Endoscopy: Results of the ROBUST-MIS 2019 challenge</article-title>. <source>Med. Image Anal.</source> <volume>70</volume>, <fpage>101920</fpage>. <pub-id pub-id-type="doi">10.1016/j.media.2020.101920</pub-id> </citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Samei</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Tsang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kesch</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Lobo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hor</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mohareri</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>A Partial Augmented Reality System with Live Ultrasound and Registered Preoperative MRI for Guiding Robot-Assisted Radical Prostatectomy</article-title>. <source>Med. Image Anal.</source> <volume>60</volume>, <fpage>101588</fpage>. <pub-id pub-id-type="doi">10.1016/j.media.2019.101588</pub-id> </citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sandoval</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Laribi</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Faure</surname>
<given-names>J.-P.</given-names>
</name>
<name>
<surname>Breque</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Richer</surname>
<given-names>J.-P.</given-names>
</name>
<name>
<surname>Zeghloul</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Towards an Autonomous Robot-Assistant for Laparoscopy Using Exteroceptive Sensors: Feasibility Study and Implementation</article-title>. <source>IEEE Robot. Autom. Lett.</source> <volume>6</volume>, <fpage>6473</fpage>&#x2013;<lpage>6480</lpage>. <pub-id pub-id-type="doi">10.1109/lra.2021.3094644</pub-id> </citation>
</ref>
<ref id="B63">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Seong-Young</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kwon</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>W. J.</given-names>
</name>
</person-group> (<year>2005</year>). &#x201c;<article-title>Intelligent Interaction between Surgeon and Laparoscopic Assistant Robot System</article-title>,&#x201d; in <conf-name>ROMAN 2005. IEEE International Workshop on Robot and Human Interactive Communication</conf-name>, <fpage>60</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1109/ROMAN.2005.1513757</pub-id> </citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Gehlbach</surname>
<given-names>P. L.</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>J. U.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Active Tremor Cancellation by a "Smart" Handheld Vitreoretinal Microsurgical Tool Using Swept Source Optical Coherence Tomography</article-title>. <source>Opt. Express</source> <volume>20</volume>, <fpage>23414</fpage>&#x2013;<lpage>23421</lpage>. <pub-id pub-id-type="doi">10.1364/oe.20.023414</pub-id> </citation>
</ref>
<ref id="B65">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>K.-T.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>C.-J.</given-names>
</name>
</person-group> (<year>2012</year>). &#x201c;<article-title>Autonomous and Stable Tracking of Endoscope Instrument Tools with Monocular Camera</article-title>,&#x201d; in <conf-name>2012 IEEE/ASME International Conference on Advanced Intelligent Mechatronics (AIM)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>39</fpage>&#x2013;<lpage>44</lpage>. <pub-id pub-id-type="doi">10.1109/AIM.2012.6266023</pub-id> </citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Srivastava</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Krizhevsky</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sutskever</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Salakhutdinov</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Dropout: A Simple Way to Prevent Neural Networks from Overfitting</article-title>. <source>J. Machine Learn. Res.</source> <volume>15</volume>, <fpage>1929</fpage>&#x2013;<lpage>1958</lpage>. </citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stott</surname>
<given-names>M. C.</given-names>
</name>
<name>
<surname>Barrie</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sebastien</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hammill</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Subar</surname>
<given-names>D. A.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Is the Use of a Robotic Camera Holder Economically Viable? A Cost Comparison of Surgical Assistant versus the Use of a Robotic Camera Holder in Laparoscopic Liver Resections</article-title>. <source>Surg. Laparosc. Endosc. Percutaneous Tech.</source> <volume>27</volume>, <fpage>375</fpage>&#x2013;<lpage>378</lpage>. <pub-id pub-id-type="doi">10.1097/SLE.0000000000000452</pub-id> </citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stoyanov</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Surgical Vision</article-title>. <source>Ann. Biomed. Eng.</source> <volume>40</volume>, <fpage>332</fpage>&#x2013;<lpage>345</lpage>. <pub-id pub-id-type="doi">10.1007/s10439-011-0441-z</pub-id> </citation>
</ref>
<ref id="B69">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Takahashi</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Solo Surgery with VIKY: Safe, Simple, and Low-Cost Robotic Surgery</article-title>,&#x201d; in <source>Handbook of Robotic and Image-Guided Surgery</source>. Editor <person-group person-group-type="editor">
<name>
<surname>Abedin-Nasab</surname>
<given-names>M. H.</given-names>
</name>
</person-group> (<publisher-loc>Amsterdam, Netherlands</publisher-loc>: <publisher-name>Elsevier</publisher-name>), <fpage>79</fpage>&#x2013;<lpage>88</lpage>. <pub-id pub-id-type="doi">10.1016/B978-0-12-814245-5.00005-0</pub-id> </citation>
</ref>
<ref id="B70">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Taniguchi</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Nishikawa</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sekimoto</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kobayashi</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Kazuhara</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Ichihara</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). &#x201c;<article-title>Classification, Design and Evaluation of Endoscope Robots</article-title>,&#x201d; in <source>Robot Surgery</source> (<publisher-loc>London, UK</publisher-loc>: <publisher-name>InTech</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>24</lpage>. <pub-id pub-id-type="doi">10.5772/6893</pub-id> </citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tonet</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Thoranaghatte</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Megali</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Dario</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Tracking Endoscopic Instruments without a Localizer: a Shape-Analysis-Based Approach</article-title>. <source>Comp. Aided Surg.</source> <volume>12</volume>, <fpage>35</fpage>&#x2013;<lpage>42</lpage>. <pub-id pub-id-type="doi">10.1080/10929080701210782</pub-id> </citation>
</ref>
<ref id="B72">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Uecker</surname>
<given-names>D. R.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y. F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Automated Instrument Tracking in Robotically Assisted Laparoscopic Surgery</article-title>. <source>J. Image Guid. Surg.</source> <volume>1</volume>, <fpage>308</fpage>&#x2013;<lpage>325</lpage>. <pub-id pub-id-type="doi">10.1002/(sici)1522-712x(1995)1:6&#x3c;308::aid-igs3&#x3e;3.0.co;2-e</pub-id> </citation>
</ref>
<ref id="B73">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Uenohara</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kanade</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Vision-based Object Registration for Real-Time Image Overlay</article-title>. <source>Comput. Biol. Med.</source> <volume>25</volume>, <fpage>249</fpage>&#x2013;<lpage>260</lpage>. <pub-id pub-id-type="doi">10.1016/0010-4825(94)00045-R</pub-id> </citation>
</ref>
<ref id="B74">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vardazaryan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mutter</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Marescaux</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Padoy</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Weakly-Supervised Learning for Tool Localization in Laparoscopic Videos</article-title>. <source>LABELS</source> <volume>2018</volume>, <fpage>169</fpage>&#x2013;<lpage>179</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-01364-6_19</pub-id> </citation>
</ref>
<ref id="B75">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wagner</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bihlmaier</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kenngott</surname>
<given-names>H. G.</given-names>
</name>
<name>
<surname>Mietkowski</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Scheikl</surname>
<given-names>P. M.</given-names>
</name>
<name>
<surname>Bodenstedt</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>A Learning Robot for Cognitive Camera Control in Minimally Invasive Surgery</article-title>. <source>Surg. Endosc.</source> <volume>2021</volume>, <fpage>1</fpage>&#x2013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1007/s00464-021-08509-8</pub-id> </citation>
</ref>
<ref id="B76">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.-F.</given-names>
</name>
<name>
<surname>Uecker</surname>
<given-names>D. R.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>1998</year>). <article-title>A New Framework for Vision-Enabled and Robotically Assisted Minimally Invasive Surgery</article-title>. <source>Comput. Med. Imaging Graphics</source> <volume>22</volume>, <fpage>429</fpage>&#x2013;<lpage>437</lpage>. <pub-id pub-id-type="doi">10.1016/S0895-6111(98)00052-4</pub-id> </citation>
</ref>
<ref id="B77">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wauben</surname>
<given-names>L. S. G. L.</given-names>
</name>
<name>
<surname>van Veelen</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Gossot</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Goossens</surname>
<given-names>R. H. M.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Application of Ergonomic Guidelines during Minimally Invasive Surgery: a Questionnaire Survey of 284 Surgeons</article-title>. <source>Surg. Endosc.</source> <volume>20</volume>, <fpage>1268</fpage>&#x2013;<lpage>1274</lpage>. <pub-id pub-id-type="doi">10.1007/s00464-005-0647-y</pub-id> </citation>
</ref>
<ref id="B78">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Weede</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Monnich</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Muller</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Worn</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2011</year>). &#x201c;<article-title>An Intelligent and Autonomous Endoscopic Guidance System for Minimally Invasive Surgery</article-title>,&#x201d; in <conf-name>2011 IEEE International Conference on Robotics and Automation</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>5762</fpage>&#x2013;<lpage>5768</lpage>. <pub-id pub-id-type="doi">10.1109/ICRA.2011.5980216</pub-id> </citation>
</ref>
<ref id="B79">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wijsman</surname>
<given-names>P. J. M.</given-names>
</name>
<name>
<surname>Broeders</surname>
<given-names>I. A. M. J.</given-names>
</name>
<name>
<surname>Brenkman</surname>
<given-names>H. J.</given-names>
</name>
<name>
<surname>Szold</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Forgione</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Schreuder</surname>
<given-names>H. W. R.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>First Experience with the AUTOLAP SYSTEM: an Image-Based Robotic Camera Steering Device</article-title>. <source>Surg. Endosc.</source> <volume>32</volume>, <fpage>2560</fpage>&#x2013;<lpage>2566</lpage>. <pub-id pub-id-type="doi">10.1007/s00464-017-5957-3</pub-id> </citation>
</ref>
<ref id="B80">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wijsman</surname>
<given-names>P. J. M.</given-names>
</name>
<name>
<surname>Molenaar</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Voskens</surname>
<given-names>F. J.</given-names>
</name>
<name>
<surname>van&#x2019;t Hullenaar</surname>
<given-names>C. D. P.</given-names>
</name>
<name>
<surname>Broeders</surname>
<given-names>I. A. M. J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Image-based Laparoscopic Camera Steering versus Conventional Steering: a Comparison Study</article-title>. <source>J. Robotic Surg.</source>. <pub-id pub-id-type="doi">10.1007/s11701-021-01342-0</pub-id> </citation>
</ref>
<ref id="B81">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Xiaolong</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Image-Py Skeleton Network Module</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/Image-Py/sknw">https://github.com/Image-Py/sknw</ext-link>
</comment>. </citation>
</ref>
<ref id="B82">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>G.-Z.</given-names>
</name>
<name>
<surname>Cambias</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cleary</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Daimler</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Drake</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Dupont</surname>
<given-names>P. E.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Medical Robotics-Regulatory, Ethical, and Legal Considerations for Increasing Levels of Autonomy</article-title>. <source>Sci. Robot.</source> <volume>2</volume>, <fpage>eaam8638</fpage>. <pub-id pub-id-type="doi">10.1126/scirobotics.aam8638</pub-id> </citation>
</ref>
<ref id="B83">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2013</year>). &#x201c;<article-title>A Kinematics Method of Automatic Visual Window for Laparoscopic Minimally Invasive Surgical Robotic System</article-title>,&#x201d; in <conf-name>2013 IEEE International Conference on Mechatronics and Automation</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>997</fpage>&#x2013;<lpage>1002</lpage>. <pub-id pub-id-type="doi">10.1109/ICMA.2013.6618051</pub-id> </citation>
</ref>
<ref id="B84">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Payandeh</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Application of Visual Tracking for Robot-Assisted Laparoscopic Surgery</article-title>. <source>J. Robotic Syst.</source> <volume>19</volume>, <fpage>315</fpage>&#x2013;<lpage>328</lpage>. <pub-id pub-id-type="doi">10.1002/rob.10043</pub-id> </citation>
</ref>
<ref id="B85">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Real-time 3D Visual Tracking of Laparoscopic Instruments for Robotized Endoscope Holder</article-title>,&#x201d; in <conf-name>Proceeding of the 11th World Congress on Intelligent Control and Automation</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>6145</fpage>&#x2013;<lpage>6150</lpage>. <pub-id pub-id-type="doi">10.1109/WCICA.2014.7053773</pub-id> </citation>
</ref>
<ref id="B86">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zinchenko</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>K.-T.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Autonomous Endoscope Robot Positioning Using Instrument Segmentation with Virtual Reality Visualization</article-title>. <source>IEEE Access</source> <volume>9</volume>, <fpage>72614</fpage>&#x2013;<lpage>72623</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3079427</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>