<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Robot. AI</journal-id>
<journal-title>Frontiers in Robotics and AI</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Robot. AI</abbrev-journal-title>
<issn pub-type="epub">2296-9144</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1341689</article-id>
<article-id pub-id-type="doi">10.3389/frobt.2024.1341689</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Robotics and AI</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Implementation and analysis of a parallel kalman filter algorithm for lidar localization based on CUDA technology</article-title>
<alt-title alt-title-type="left-running-head">Mochurad</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frobt.2024.1341689">10.3389/frobt.2024.1341689</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Mochurad</surname>
<given-names>Lesia</given-names>
</name>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2392101/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff>
<institution>Department of Artificial Intelligence</institution>, <institution>Lviv Polytechnic National University</institution>, <addr-line>Lviv</addr-line>, <country>Ukraine</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1110377/overview">Wai-keung Fung</ext-link>, Cardiff Metropolitan University, United Kingdom</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1640296/overview">Alwin Poulose</ext-link>, Indian Institute of Science Education and Research, India</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2522282/overview">Dmytro Chumachenko</ext-link>, University of Waterloo, Canada</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2610401/overview">Viacheslav Kovtun</ext-link>, Polish Academy of Sciences, Poland</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Lesia Mochurad, <email>lesia.i.mochurad@lpnu.ua</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>02</day>
<month>02</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>11</volume>
<elocation-id>1341689</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>11</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>16</day>
<month>01</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Mochurad.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Mochurad</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>
<bold>Introduction:</bold> Navigation satellite systems can fail to work or work incorrectly in a number of conditions: signal shadowing, electromagnetic interference, atmospheric conditions, and technical problems. All of these factors can significantly affect the localization accuracy of autonomous driving systems. This emphasizes the need for other localization technologies, such as Lidar.</p>
<p>
<bold>Methods:</bold> The use of the Kalman filter in combination with Lidar can be very effective in various applications due to the synergy of their capabilities. The Kalman filter can improve the accuracy of lidar measurements by taking into account the noise and inaccuracies present in the measurements.</p>
<p>
<bold>Results:</bold> In this paper, we propose a parallel Kalman algorithm in three-dimensional space to speed up the computational speed of Lidar localization. At the same time, the initial localization accuracy of the latter is preserved. A distinctive feature of the proposed approach is that the Kalman localization algorithm itself is parallelized, rather than the process of building a map for navigation. The proposed algorithm allows us to obtain the result 3.8 times faster without compromising the localization accuracy, which was 3% for both cases, making it effective for real-time decision-making.</p>
<p>
<bold>Discussion:</bold> The reliability of this result is confirmed by a preliminary theoretical estimate of the acceleration rate based on Ambdahl&#x2019;s law. Accelerating the Kalman filter with CUDA for Lidar localization can be of significant practical value, especially in real-time and in conditions where large amounts of data from Lidar sensors need to be processed.</p>
</abstract>
<kwd-group>
<kwd>extended kalman filter</kwd>
<kwd>lidar</kwd>
<kwd>CUDA technology</kwd>
<kwd>real-time systems</kwd>
<kwd>acceleration</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Intelligence in Robotics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Lidar, or light and range detection, is a method of remote sensing (<xref ref-type="bibr" rid="B36">Tian et al., 2021</xref>) that uses light in the form of pulsed lasers to measure distances to objects. It has become an integral technology in various industries, including autonomous vehicles (<xref ref-type="bibr" rid="B8">Elhousni and Huang, 2020</xref>), robotics (<xref ref-type="bibr" rid="B24">Mochurad et al., 2023a</xref>), and environmental monitoring (<xref ref-type="bibr" rid="B10">Guo et al., 2020</xref>).</p>
<p>Lidar systems consist of three main components:<list list-type="simple">
<list-item>
<p>1. <italic>Laser transmitter:</italic> This component generates short pulses of laser light (usually in the infrared) that are directed at objects in the environment.</p>
</list-item>
<list-item>
<p>2. <italic>Detector:</italic> The detector receives reflected light pulses from objects and converts them into an electrical signal.</p>
</list-item>
<list-item>
<p>3. <italic>Data processing system:</italic> The data processing system calculates the distance to objects using the time elapsed between the transmission of the pulse and the receipt of the reflected signal. Using the known angles and orientation of the Lidar system, the coordinates of the reflected points in three-dimensional space can be determined.</p>
</list-item>
</list>
</p>
<p>Lidar can be:<list list-type="simple">
<list-item>
<p>1. <italic>Static Lidar:</italic> Used to scan static objects from a fixed position. This is often used in surveying and mapping to create three-dimensional models of the landscape and infrastructure.</p>
</list-item>
<list-item>
<p>2. <italic>Mobile Lidar:</italic> Used to collect data when the Lidar system is on a moving object, such as a car, drone, or airplane. Mobile Lidar provides fast data collection over large areas and wide coverage. It is used in industries such as aerial surveying, infrastructure monitoring, and autonomous vehicles in particular.</p>
</list-item>
</list>
</p>
<p>However, Lidar has some disadvantages, such as high cost, relatively large size and weight, and sensitivity to weather conditions such as rain or fog. Some of these disadvantages can be compensated for by combining Lidar with other technologies and developing new, more compact and cost-effective Lidar systems.</p>
<p>One of the key applications of lidar is localization, which involves estimating the position and orientation of an object in the environment using data from a lidar sensor (<xref ref-type="bibr" rid="B21">Marck et al., 2013</xref>). Localization is extremely important for autonomous vehicles (<xref ref-type="bibr" rid="B19">Lu et al., 2022</xref>), where it is necessary to determine the position of the vehicle for safe and efficient operation. Optimization of navigation algorithms and methods can contribute to environmental and economic development, as autonomous vehicles can reduce fuel costs and ensure efficient use of infrastructure (<xref ref-type="bibr" rid="B39">Varsi et al., 2021</xref>).</p>
<p>Improving the navigation algorithms of autonomous cars can accelerate the development of smart cities, where autonomous vehicles play an important role in creating integrated and efficient transportation solutions (<xref ref-type="bibr" rid="B30">Phang et al., 2021</xref>; <xref ref-type="bibr" rid="B40">Wang et al., 2022</xref>).</p>
<p>Lidar&#x2019;s localization speed is an important factor for real-time applications (<xref ref-type="bibr" rid="B17">Liu et al., 2023</xref>), especially for applications such as autonomous driving (<xref ref-type="bibr" rid="B20">Luo et al., 2019</xref>) where timely decision making is essential. Traditional localization methods such as the Extended Kalman Filter (EKF) (<xref ref-type="bibr" rid="B45">Zhang, 2019</xref>) and the iterative closest point algorithm (ICP) (<xref ref-type="bibr" rid="B44">Zhang et al., 2022</xref>), can be computationally expensive and do not meet the requirements of real-time applications (<xref ref-type="bibr" rid="B5">Dabbiru et al., 2020</xref>; <xref ref-type="bibr" rid="B34">Shymanskyi et al., 2022</xref>).</p>
<p>As it is known (<xref ref-type="bibr" rid="B9">Garland et al., 2008</xref>), the CUDA parallel computing platform was developed by NVIDIA, which can significantly accelerate various applications, including Lidar localization. CUDA allows developers to use the massively parallel architecture of modern GPUs, which allows them to process large amounts of Lidar data faster.</p>
<p>The relevance of the conducted research can be considered from the following perspectives:<list list-type="simple">
<list-item>
<p>&#x2022; <italic>Development of autonomous vehicles:</italic> With the active growth of the autonomous vehicle industry, the development of new and improvement of existing navigation methods are becoming increasingly relevant. Autonomous vehicles require high accuracy in localization and stable operation of navigation algorithms for safe and efficient movement.</p>
</list-item>
<list-item>
<p>&#x2022; <italic>Improved traffic safety:</italic> Enhancing the localization methods of autonomous vehicles will contribute to ensuring a high level of safety for passengers, pedestrians, and other road users, thereby reducing the risk of accidents and collisions on the roads.</p>
</list-item>
<list-item>
<p>&#x2022; <italic>Environmental sustainability and cost-effectiveness:</italic> Optimizing navigation algorithms and methods can contribute to both environmental and economic development, as autonomous vehicles have the potential to reduce fuel costs and ensure efficient use of infrastructure.</p>
</list-item>
<list-item>
<p>&#x2022; <italic>Application in various fields:</italic> Improvement of navigation algorithms can have a positive impact on various sectors, including logistics, automated warehouses, and robotics, where high-precision localization and navigation are critically important for efficient operations.</p>
</list-item>
<list-item>
<p>&#x2022; <italic>Advancement of artificial intelligence technologies:</italic> The use of artificial intelligence methods, such as machine learning and computer vision, enables the creation of more accurate and adaptive navigation systems that can autonomously improve over time during operation.</p>
</list-item>
<list-item>
<p>&#x2022; <italic>Application of parallel computing:</italic> The use of parallel computing significantly enhances the speed of algorithms and ensures more efficient processing of large volumes of data received from the sensors of autonomous vehicles.</p>
</list-item>
<list-item>
<p>&#x2022; <italic>Integration with other transportation systems:</italic> Improving the navigation system of autonomous vehicles can facilitate integration with other transportation systems, such as intelligent road networks and public transportation systems.</p>
</list-item>
<list-item>
<p>&#x2022; <italic>Development of smart cities:</italic> Enhancing the navigation algorithms of autonomous vehicles can expedite the development of smart cities, where autonomous transport plays a crucial role in creating integrated and efficient transportation solutions.</p>
</list-item>
<list-item>
<p>&#x2022; <italic>Ensuring transportation accessibility:</italic> Improving the accuracy of autonomous vehicle localization can help ensure transportation accessibility for individuals with disabilities, the elderly, and other population groups for whom independent car operation may be difficult or impossible.</p>
</list-item>
<list-item>
<p>&#x2022; Enhancing the competitiveness of automakers: The development and optimization of navigation algorithms can help automakers increase their competitiveness in the market by offering consumers autonomous vehicles with high precision in localization and navigation. This can contribute to the advancement of autonomous transport and the widespread adoption of these technologies among a broad range of users.</p>
</list-item>
</list>
</p>
<p>The results of this research can have a positive impact on road safety, cost-effectiveness, environmental sustainability, and transportation accessibility. Additionally, they can contribute to the development of smart cities, integration of transportation systems, and the enhancement of competitiveness for automakers.</p>
<p>The relevance of employing parallel computing in the context of autonomous car navigation becomes evident when considering the following factors:<list list-type="simple">
<list-item>
<p>1. <italic>Large Data Volumes</italic> (<xref ref-type="bibr" rid="B11">Huang and Cao, 2021</xref>): Autonomous vehicles accumulate substantial data from diverse sensors like lidars, radars, and cameras. Swift processing of this data is crucial for appropriate responses to varied situations. Parallel computing enables simultaneous data processing, enhancing the efficiency of the navigation system.</p>
</list-item>
<list-item>
<p>2. <italic>Algorithmic Speed</italic> (<xref ref-type="bibr" rid="B38">Varsi et al., 2020</xref>): The swift execution of navigation tasks such as localization, route planning, and obstacle detection is imperative for autonomous cars. Parallel computing facilitates the distribution of tasks across numerous processors or cores, resulting in rapid responses and reduced information processing durations.</p>
</list-item>
<list-item>
<p>3. <italic>Energy Efficiency</italic> (<xref ref-type="bibr" rid="B3">Bi et al., 2020</xref>): Through optimal allocation of computing resources, parallel computing contributes to more energy-efficient navigation algorithms. This aspect is particularly significant for electric and hybrid vehicles with limited energy resources.</p>
</list-item>
<list-item>
<p>4. <italic>Compatibility with Distributed Systems</italic> (<xref ref-type="bibr" rid="B1">Amin et al., 2019</xref>)<italic>:</italic> Parallel computing can be leveraged to create distributed data processing systems. Different segments of navigation algorithms can be executed on diverse devices or nodes within the computing network, optimizing overall system performance, reliability, and scalability.</p>
</list-item>
<list-item>
<p>5. <italic>Real-Time Assurance</italic> (<xref ref-type="bibr" rid="B27">Mochurad and Shchur, 2021</xref>): Given the necessity for autonomous vehicles to respond to traffic situations in real-time, parallel computing plays a crucial role in ensuring swift execution of algorithms. This is essential for maintaining safe and efficient traffic conditions.</p>
</list-item>
<list-item>
<p>6. <italic>Adaptability to Various Computing Resources</italic> (<xref ref-type="bibr" rid="B11">Huang and Cao, 2021</xref>)<italic>:</italic> Parallel computing is applicable across different computing platforms, including CPUs, GPUs, and specialized accelerators like FPGAs and ASICs. This adaptability allows navigation algorithms to be tailored to the available resources, optimizing their overall efficiency.</p>
</list-item>
</list>
</p>
<p>The article analyzes the literature on the topic of the study. This was done with a view to highlighting the main advantages and disadvantages of the current state of the issue under consideration. In the paper (<xref ref-type="bibr" rid="B28">Monta&#xf1;ez et al., 2023</xref>), the authors employed an extended Kalman filter for the detection of moving objects. The effectiveness of the EKF was assessed using a dataset that includes location information obtained from LiDAR and a radar sensor for an object moving along a trajectory with abrupt changes.</p>
<p>In (<xref ref-type="bibr" rid="B14">Koide et al., 2021</xref>) presents an approach that creates a globally consistent 3D map structure based on the loss factor during a real-time GPU-accelerated mapping process. Data is obtained from a 3D Lidar and maps are constructed based on it. The GPU is used to speed up the mapping algorithm during map creation.</p>
<p>In the research (<xref ref-type="bibr" rid="B33">Shreyas Madhav and Rajesh Kanna, 2021</xref>), an advanced Lidar 3D SLAM algorithm is introduced for autonomous aerial robots. The alignment process involves the extraction of Fast Point Feature Histogram (FPFH) descriptors, subsequently refined through iterative nearest point registration (NPR). The ultimate trajectory estimation undergoes 3D pose graph optimization to reduce potential overall drift. Simulated results demonstrate a noteworthy 26% decrease in execution time when employing the parallelized algorithm with 4 CPUs compared to its serial counterpart.</p>
<p>In (<xref ref-type="bibr" rid="B12">Jang et al., 2022</xref>), the authors detail an algorithm that employs GPU parallel processing to enhance the existing ND map matching process. This optimization resulted in a remarkable 48-fold acceleration while preserving accuracy.</p>
<p>The integration of a semantic image with low-resolution 3D Lidar point clouds and the generation of dense semantic depth maps are addressed in (<xref ref-type="bibr" rid="B18">Lou et al., 2023</xref>). Utilizing visual odometry, the method selects functional ORB points with depth information to enhance positional accuracy. During unmanned vehicle positioning, parallel threads are employed to aggregate 3D semantic point clouds.</p>
<p>In the paper (<xref ref-type="bibr" rid="B4">Chiang et al., 2023</xref>), the authors leverage Lidar as the primary auxiliary sensor, proposing a Lidar-based simultaneous localization and mapping (SLAM) approach for positioning, navigation, and synchronization. Furthermore, point cloud registration is executed through a three-dimensional normal distribution transform (NDT). The initial Lidar position assumption for Lidar-based SLAM is derived from two sources: one being a differential global navigation satellite system (GNSS) solution, and the other being an inertial navigation system (INS) and an integrated GNSS solution created using an extended Kalman filter with added motion constraints, including zero velocity update and nonholonomic constraint.</p>
<p>An improved NDT algorithm and its FPGA implementation were presented in (<xref ref-type="bibr" rid="B6">Deng et al., 2021</xref>). The authors achieved the acceleration of the search operation by using a new data structure called OAVS, which is non-recursive and efficient. The optimized semantic NDT algorithm based on OAVS significantly reduced the number of search operations by eliminating unnecessary queries. Additionally, the proposed streaming FPGA accelerator architecture for SEO-NDT improved real-time performance and ensured energy efficiency. When compared to advanced embedded CPU and GPU processors, the FPGA implementation provided up to 35.85x and 2.44x performance acceleration, respectively.</p>
<p>In (<xref ref-type="bibr" rid="B7">Dong et al., 2021</xref>) authors used this method in such a way that it performs all calculations directly on the range images created using 3D LiDAR scans, which avoids explicit processing of the 3D point cloud and quickly selects the poles for each scan.</p>
<p>As indicated in (<xref ref-type="bibr" rid="B22">Mendez Maldonado et al., 2021</xref>), the authors developed a hybrid convolutional neural network (CNN) by directly applying a Markovian grid-based localization approach on the GPU. This CNN is capable of simultaneously handling image-based localization and odometry-based probability propagation within a single neural network. The detailed description of the Markovian approach can be found in (<xref ref-type="bibr" rid="B15">Kovtun et al., 2023a</xref>).</p>
<p>In (<xref ref-type="bibr" rid="B35">Sun et al., 2020</xref>) a new data structure with a spatial partitioning method was presented, which can be successfully built even for large volumes of point clouds. Based on this structure, a KNN search algorithm was developed that works effectively when the distribution of points is uneven. This innovative structure is implemented on both an FPGA accelerator and a GPU.</p>
<p>In the following paper (<xref ref-type="bibr" rid="B42">Xie et al., 2022</xref>), introduces a lightweight convolutional neural network (CNN) framework designed for the semantic segmentation of a projection-based LiDAR point cloud. This framework comprises only 1.9 million parameters, marking an 87% reduction compared to leading-edge networks. The evaluation on a GPU revealed a processing time of 38.5 milliseconds per frame and an achieved result of 47.9% mIoU on the Semantic-KITTI dataset. Moreover, the proposed CNN is tailored for FPGAs using the NVDLA architecture, demonstrating a 2.74x speedup compared to a GPU-based implementation and a noteworthy 46x improvement in energy efficiency.</p>
<p>In (<xref ref-type="bibr" rid="B25">Mochurad and Kryvinska, 2021</xref>) a parallel parallelization algorithm is proposed to solve the problem of determining the current position of a lidar in 2D based on OpenMP technology. The authors also indicated prospects for further research: 1) optimization of the computing process based on CUDA technology using GPUs; 2) consideration of a more complex spatial domain.</p>
<p>The researchers in (<xref ref-type="bibr" rid="B26">Mochurad et al., 2023b</xref>) introduced a parallel algorithm employing CUDA technologies to establish the 2D position of a lidar through the Particle Filter algorithm. Despite achieving a considerable speedup with this technology, it might appear that their findings challenge the hypothesis presented in our study. Nonetheless, this is not the case, as our investigation focuses on a distinct algorithm, addressing the issue of 3D localization and extending beyond closed-room localization.</p>
<p>In the study (<xref ref-type="bibr" rid="B43">Xu et al., 2022</xref>), the use of measurement uncertainty estimation is identified as an effective method for tracking a vehicle, based on LiDAR detectors. The authors propose an extended Kalman filter framework, consisting of two main components: the first is capable of assessing the statistics of measurement noises dependent on the state to detect LiDAR objects, while the second generates multi-hypothesis measurements based on the trajectory of the identified vehicle.</p>
<p>The shift from traditional automobiles to autonomous ones encompasses the integration and enhancement of diverse technologies and computerized algorithms. An integral aspect influencing the efficacy of autonomous vehicles is their localization, along with perception, route planning, and control, where the precision and effectiveness of localization assume a pivotal role in autonomous driving. About (<xref ref-type="bibr" rid="B31">Poulose et al., 2022</xref>), the paper underscores the significance of the localization challenge in autonomous vehicles and elucidates its map-based realization employing point cloud matching. The authors introduce a localization system leveraging the Robot Operating System (ROS) in conjunction with Autoware. The empirical findings demonstrate that a map-centric localization system utilizing 3D lidar scanning delivers adequately precise real-time localization for autonomous driving within a university campus setting. The paper provides an exhaustive account of the methodologies for crafting point cloud maps and vehicle localization, along with a systematic guide for implementing a map-based system for autonomous driving.</p>
<p>In cities, there are many regions where the global navigation satellite system does not work, where localization of autonomous driving remains a problem. Various methods have been previously proposed to improve the localization accuracy by using accurate distance measurements obtained from Lidar sensors and for the speed of map construction. This study proposes a parallelized 3D Kalman algorithm using CUDA to accelerate the computational speed of Lidar localization while maintaining the original lidar localization accuracy. Unlike previous papers that parallelize the map construction, this approach parallelizes the Kalman localization algorithm itself.</p>
<p>The aim of this study is to propose a parallel algorithm based on CUDA technology to accelerate lidar localization in 3D space.</p>
<p>The main contribution of this article can be summarized as follows:<list list-type="simple">
<list-item>
<p>1. Anew localization algorithm is proposed that uses the Kalman filter and CUDA technology to accelerate the computational speed of Lidar localization in 3D;</p>
</list-item>
<list-item>
<p>2. A theoretical estimate of the acceleration based on Ambdahl&#x2019;s law was calculated;</p>
</list-item>
<list-item>
<p>3. A comparison of the sequential Kalman algorithm and the parallel implementation for different sizes of datasets is carried out, and quantitative estimates of the advantages obtained over existing studies are given;</p>
</list-item>
<list-item>
<p>4. The localization error is determined, and it is found that the proposed algorithm allowed to obtain a speedup of 3.8 times without reducing the localization error, which amounted to 3%.</p>
</list-item>
</list>
</p>
<p>The rest of this article is organized as follows: Chapter 2 describes the proposed parallel localization algorithm, analyzes the computational complexity presents a new theoretical estimate of the speedup based on Ambdahl&#x2019;s law, and presents the steps of implementing the proposed algorithm using CUDA technology. Chapter 3 describes the environment used for testing and presents the relevant results of numerical experiments. Conclusions and prospects for further research are shown in the last chapter.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<p>The Kalman filter, discussed in (<xref ref-type="bibr" rid="B41">Wo and Biswal, 2023</xref>), serves as a powerful recursive tool for estimating the internal state of linear dynamic systems by analyzing a series of noisy measurements. Its application extends across various domains, encompassing engineering, economics, radar, computer vision, and the estimation of structural macroeconomic models. This filter holds significance as a fundamental component in control theory and the development of control systems. In conjunction with the linear-quadratic regulator (LQR), the Kalman filter addresses the challenges posed by linear-quadratic Gaussian (LQG) control problems. Collectively, the Kalman filter, LQR, and LQG controller represent essential solutions to core issues in control theory.</p>
<p>As noted by the authors of (<xref ref-type="bibr" rid="B23">Meng et al., 2023</xref>), Kalman filtering is an optimal recursive numerical computing algorithm characterized by the efficiency of program memory use, speed, and suitability for real-time data processing programs.</p>
<p>It is based on a mathematical model of the system and uses the principles of Kalman filtering to combine the predicted state with actual measurements to obtain the best estimate of the system&#x2019;s state. It has two main steps: prediction and correction. The prediction step uses mathematical models of the system and the previous state estimation to make a forecast of the future state of the system. In the correction step, the predicted state is updated to take into account new measurements that are reduced by noise and incompleteness. The state estimation uses information such as the state vector and covariance matrix to provide an optimal estimate and minimal prediction error. This is achieved by weighting the prediction and new measurements based on their accuracy and uncertainty.</p>
<p>Kalman filters are constructed upon time-discretized linear dynamical systems, which are represented as Markov chains, as outlined in (<xref ref-type="bibr" rid="B16">Kovtun et al., 2023b</xref>). These chains are built on linear operators subject to errors, which may include Gaussian noise. The system&#x2019;s state is expressed as a vector of real numbers. At each discrete time increment (clock cycle), a linear operator is applied to the state, generating a new state that incorporates some noise and, if available, information from the system control. Subsequently, another linear operator, combined with additional noise, is applied to the true (&#x201c;hidden&#x201d;) state to produce the observed outputs. While the Kalman filter shares similarities with the hidden Markov model, a key distinction lies in the fact that the hidden state variables in the Kalman filter assume values in a continuous space, contrasting with the discrete state space of the hidden Markov model. Notably, there exists a robust duality between the equations of the Kalman filter and the hidden Markov model.<list list-type="simple">
<list-item>
<p>&#x2022; Foresight stage</p>
</list-item>
</list>
</p>
<p>The Kalman filter model postulates that the actual state at a given time point <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is deduced from the state at <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, as illustrated <xref ref-type="fig" rid="F1">Figure 1</xref>:<disp-formula id="equ1">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="normal">B</mml:mi>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>where.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Flowchart of the multivariate Kalman filter algorithm.</p>
</caption>
<graphic xlink:href="frobt-11-1341689-g001.tif"/>
</fig>
<p>
<inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is a state transition model applied to the previous state <inline-formula id="inf4">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>;</p>
<p>
<inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is a model of control effects applied to the control vector <inline-formula id="inf6">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>;</p>
<p>
<inline-formula id="inf7">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the noise of the process, which is assumed to have a multivariate normal distribution with zero mean and covariance <inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>The covariance of the predicted <inline-formula id="inf9">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> state is calculated using the following formula:<disp-formula id="equ2">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<list list-type="simple">
<list-item>
<p>&#x2022; Refinement stage</p>
</list-item>
</list>
</p>
<p>At a point in time <inline-formula id="inf10">
<mml:math id="m12">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> observation (or measurement) <inline-formula id="inf11">
<mml:math id="m13">
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> of the present state <inline-formula id="inf12">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is made in accordance with<disp-formula id="equ3">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>C</mml:mi>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>where C is the observation model that maps the true state space to the observed space, and <inline-formula id="inf13">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the observation noise, assumed to be Gaussian white noise with zero mean and covariance <inline-formula id="inf14">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>Next, the covariance of innovations (deviation) is calculated <inline-formula id="inf15">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> which is then used to calculate the optimal Kalman transfer coefficient:<disp-formula id="equ4">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>T</mml:mi>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ5">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>T</mml:mi>
</mml:msubsup>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>After that, we calculate the updated state estimate and its covariance:<disp-formula id="equ6">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mtext>&#x2002;</mml:mtext>
<mml:mo>&#x2212;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>H</mml:mi>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:msub>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ7">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo>&#x2212;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>K</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>The initial state and noise vectors at each cycle {<inline-formula id="inf16">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf17">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf18">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf19">
<mml:math id="m26">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. <inline-formula id="inf20">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>} are assumed to be mutually independent.</p>
<sec id="s2-1">
<title>2.1 The proposed parallel algorithm description</title>
<p>CUDA was used to parallelize the Kalman algorithm. Since all the operations in the Kalman algorithm are vector operations, i.e., transformations and other calculations are performed by matrix operations, it was decided to speed up their execution by moving them to CUDA. Thus, we have two subtasks: the prediction stage and the refinement stage. We created corresponding execution kernels for them. Next, we present an overview of the CUDA-based Kalman algorithm system:<list list-type="simple">
<list-item>
<p>1. <italic>Input Data</italic>:</p>
<list list-type="simple">
<list-item>
<p>&#x2022; The algorithm takes input data related to the prediction and refinement stages. This may include state predictions, covariance estimates, and other relevant information for each point.</p>
</list-item>
</list>
</list-item>
<list-item>
<p>2. <italic>Prediction Stage Kernel</italic>:</p>
<list list-type="simple">
<list-item>
<p>&#x2022; CUDA kernel specifically designed for the prediction stage.</p>
</list-item>
<list-item>
<p>&#x2022; Parameters: gridSize &#x3d; (point_num, 1), blockSize &#x3d; (Predict_Size, Predict_Size).</p>
</list-item>
<list-item>
<p>&#x2022; Each thread processes a point, calculates state prediction <inline-formula id="inf21">
<mml:math id="m28">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and covariance <inline-formula id="inf22">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> using the transition matrix <inline-formula id="inf23">
<mml:math id="m30">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>&#x2022; Execution involves parallel matrix operations for multiple points.</p>
</list-item>
</list>
</list-item>
<list-item>
<p>3. <italic>Synchronization (Prediction Stage)</italic>:</p>
<list list-type="simple">
<list-item>
<p>&#x2022; Threads are synchronized after completing calculations for the current step.</p>
</list-item>
<list-item>
<p>&#x2022; Ensures all threads have updated values of assumptions and covariances for the next steps.</p>
</list-item>
</list>
</list-item>
<list-item>
<p>4. <italic>Refinement Stage Kernel</italic>:</p>
<list list-type="simple">
<list-item>
<p>&#x2022; CUDA kernel dedicated to the refinement stage.</p>
</list-item>
<list-item>
<p>&#x2022;Parameters: gridSize &#x3d; (point_num, 1), blockSize &#x3d; (Predict_Size, Predict_Size).</p>
</list-item>
<list-item>
<p>&#x2022; Each thread processes a point, calculates deviation covariance <inline-formula id="inf24">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, optimal Kalman transfer coefficient <inline-formula id="inf25">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, updates assumptions <inline-formula id="inf26">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and covariance <inline-formula id="inf27">
<mml:math id="m34">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>&#x2022;Execution involves parallel matrix operations for multiple points.</p>
</list-item>
</list>
</list-item>
<list-item>
<p>5. <italic>Matrix Inversion (Refinement Stage)</italic>:</p>
<list list-type="simple">
<list-item>
<p>&#x2022; As part of calculating <inline-formula id="inf28">
<mml:math id="m35">
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, matrix inversion is required.</p>
</list-item>
<list-item>
<p>&#x2022; All threads are synchronized to perform matrix inversion collectively.</p>
</list-item>
<list-item>
<p>&#x2022; After inversion, parallel calculations resume.</p>
</list-item>
</list>
</list-item>
<list-item>
<p>6. <italic>Synchronization (Refinement Stage)</italic>:</p>
<list list-type="simple">
<list-item>
<p>&#x2022; Threads are synchronized again after calculating each update element.</p>
</list-item>
<list-item>
<p>&#x2022; Ensures consistent updated values before moving to the next step of the algorithm iteration.</p>
</list-item>
</list>
</list-item>
<list-item>
<p>7. <italic>Output Data</italic>:</p>
<list list-type="simple">
<list-item>
<p>&#x2022; The algorithm produces updated assumptions and covariances after both prediction and refinement stages.</p>
</list-item>
</list>
</list-item>
</list>
</p>
<p>This presents an overview of the CUDA-based Kalman algorithm system and provides an overview of how the CUDA-based Kalman algorithm processes input data, performs parallelized matrix operations, and synchronizes threads at critical points to maintain consistency in the calculations.</p>
</sec>
<sec id="s2-2">
<title>2.2 Analysis of computational complexity and theoretical estimation of speedup</title>
<p>The complexity of the Kalman algorithm:<disp-formula id="equ8">
<mml:math id="m36">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msubsup>
<mml:mi>n</mml:mi>
<mml:mi>z</mml:mi>
<mml:mn>2.4</mml:mn>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mi>n</mml:mi>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf29">
<mml:math id="m37">
<mml:mrow>
<mml:msubsup>
<mml:mi>n</mml:mi>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> follows from the manipulation of the matrices by the dimension <inline-formula id="inf30">
<mml:math id="m38">
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>x</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> x <inline-formula id="inf31">
<mml:math id="m39">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>x</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, &#x430; <inline-formula id="inf32">
<mml:math id="m40">
<mml:mrow>
<mml:msubsup>
<mml:mi>n</mml:mi>
<mml:mi>z</mml:mi>
<mml:mn>2.4</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> to the power of 2.4 due to the inversion of the matrix <inline-formula id="inf33">
<mml:math id="m41">
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>z</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> x <inline-formula id="inf34">
<mml:math id="m42">
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>z</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>Therefore, in the context of parallelization using CUDA, the time complexity will be equivalent to the algorithm&#x2019;s complexity divided by the number of threads, with the exception of matrix inversion, as it involves synchronization.<disp-formula id="equ9">
<mml:math id="m43">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msubsup>
<mml:mi>n</mml:mi>
<mml:mi>z</mml:mi>
<mml:mn>2.4</mml:mn>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mi>n</mml:mi>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf35">
<mml:math id="m44">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the number of threads.</p>
<p>Since the Kalman algorithm consists of matrix operations, it was fully parallelized, but the matrix inversion is performed in synchronous mode. The data reading operations can be neglected, so for the first Amdahl&#x2019;s law (<xref ref-type="bibr" rid="B2">Anshu, 2019</xref>), the value of the sequential part (&#x3b1;) for the Kalman filter position and velocity prediction problem can be assigned the ratio of the complexity of the parallelized matrix operations to all other operations, namely,:<disp-formula id="equ10">
<mml:math id="m45">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:msubsup>
<mml:mi>n</mml:mi>
<mml:mi>z</mml:mi>
<mml:mn>2.4</mml:mn>
</mml:msubsup>
<mml:mrow>
<mml:msubsup>
<mml:mi>n</mml:mi>
<mml:mi>z</mml:mi>
<mml:mn>2.4</mml:mn>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mn>6</mml:mn>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2248;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>73</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>72</mml:mn>
</mml:mrow>
<mml:mn>289.72</mml:mn>
</mml:mfrac>
<mml:mo>&#x2248;</mml:mo>
<mml:mn>0.26</mml:mn>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>Then the theoretical acceleration is as follows:<disp-formula id="equ11">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mi>p</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>26</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>26</mml:mn>
</mml:mrow>
<mml:mn>640</mml:mn>
</mml:mfrac>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2248;</mml:mo>
<mml:mn>3.9</mml:mn>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>where <italic>&#x3b1;</italic> is the fraction of the sequential algorithm, <italic>p</italic> is the number of cores.</p>
</sec>
<sec id="s2-3">
<title>2.3 Implementation of the proposed algorithm</title>
<p>CUDA was used to parallelize the Kalman algorithm. Since the Kalman algorithm consists of matrix operations, it was fully parallelized. That is, all matrix operations from the prediction and update stages were encapsulated in two corresponding processing kernels on the GPU memory. This way, we got a kernel for predictions and updates, which made it possible to speed up the matrix operations of the algorithm itself, i.e., the algorithm itself.</p>
<p>Since the algorithm is iterative, after all the cores have completed the operation of one pass, synchronization was established to avoid possible situations of resource races or access to uncalculated values.</p>
<p>To use CUDA, we built two processing functions for each of the stages. The function of the prediction stage is performed as follows:<list list-type="simple">
<list-item>
<p>1. Thread indexing: The <bold>tx</bold> and <bold>ty</bold> variables store the index of the thread in the block, and <bold>bx</bold> stores the block index.</p>
</list-item>
<list-item>
<p>2. Allocation of shared memory: By using the <bold>__shared__</bold> keyword, a shared memory location is declared for the <bold>temp</bold> array with the size of the covariance matrix used for intermediate results.</p>
</list-item>
<list-item>
<p>3. State prediction: each thread calculates the predicted state <inline-formula id="inf36">
<mml:math id="m47">
<mml:mrow>
<mml:msup>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for a particular point using the formula <inline-formula id="inf37">
<mml:math id="m48">
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> where the required range of matrices is selected for each specific point. This calculation is performed only by the first thread in each block (<bold>if (tx &#x3c; 1)</bold>), and the results are stored in the <bold>new_predictD</bold> array.</p>
</list-item>
<list-item>
<p>4. Prediction of covariance: Each thread computes the predicted covariance matrix <inline-formula id="inf38">
<mml:math id="m49">
<mml:mrow>
<mml:msup>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for a particular matrix element using the following formula <inline-formula id="inf39">
<mml:math id="m50">
<mml:mrow>
<mml:msubsup>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2219;</mml:mo>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>Q</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The intermediate result is stored in the shared array <bold>temp.</bold>
</p>
</list-item>
<list-item>
<p>5. Update covariance: The intermediate results stored in <bold>temp</bold> are multiplied by <inline-formula id="inf40">
<mml:math id="m51">
<mml:mrow>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> each individual element in its own stream. To each element of the final result is added the corresponding element of the matrices <inline-formula id="inf41">
<mml:math id="m52">
<mml:mrow>
<mml:mi mathvariant="bold-italic">Q</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> i. These calculations are performed in the condition (<bold>if (bx &#x3c; point_num</bold>) to avoid a possible access attempt outside the allocated memory. The result is saved to the <bold>new_covD</bold> array.</p>
</list-item>
<list-item>
<p>6. Synchronization of threads: The <bold>__syncthreads()</bold> function ensures that all threads in a block complete their calculations and synchronize them before continuing execution.</p>
</list-item>
</list>
</p>
<p>The refinement function is performed according to the algorithm described below:<list list-type="simple">
<list-item>
<p>1. Indexing of threads: The <bold>tx</bold> and <bold>ty</bold> variables store the indexes of threads within the same block, <bold>bx</bold> stores the block number.</p>
</list-item>
<list-item>
<p>2. Allocation of shared memory: The <bold>__shared__</bold> code word declares shared memory for the arrays <bold>temp</bold>, <bold>temp2</bold>, <bold>temp3</bold>, <inline-formula id="inf42">
<mml:math id="m53">
<mml:mrow>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <bold>temp4,</bold> and <bold>temp5</bold> for intermediate calculations and the value of the Kalman coefficient.</p>
</list-item>
<list-item>
<p>3. Calculations. <inline-formula id="inf43">
<mml:math id="m54">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>: Each thread computes one specific element of the product of two matrices <inline-formula id="inf44">
<mml:math id="m55">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> using the required set of values. The result of each thread is stored in a shared array <bold>temp.</bold>
</p>
</list-item>
<list-item>
<p>4. Calculations. <inline-formula id="inf45">
<mml:math id="m56">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>: Streams compute a specific single element of the result of an operation <inline-formula id="inf46">
<mml:math id="m57">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> formula using the value already found <inline-formula id="inf47">
<mml:math id="m58">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> on the previous one. The result is stored in the shared array <bold>temp2.</bold>
</p>
</list-item>
<list-item>
<p>5. Calculations. <inline-formula id="inf48">
<mml:math id="m59">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>: Each thread computes one specific element of the result of the product <inline-formula id="inf49">
<mml:math id="m60">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. The result is stored in the shared array <bold>temp3.</bold>
</p>
</list-item>
<list-item>
<p>6. Synchronization and search for the inverse matrix: The threads are synchronized so that all previous operations have finished their calculations and the subsequent execution is with a fully filled <bold>temp2</bold> array. To find <inline-formula id="inf50">
<mml:math id="m61">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> the already calculated value of the matrix is taken and the value of the inverse is searched for on its basis in sequential mode. The result of the inverse matrix search is stored in <bold>temp2_inv.</bold>
</p>
</list-item>
<list-item>
<p>7. Calculation K: Threads calculate one specific element of the Kalman transfer coefficient matrix using the calculation results stored in <bold>temp3</bold> and <bold>temp2_inv.</bold> The result is saved to a shared array <inline-formula id="inf51">
<mml:math id="m62">
<mml:mrow>
<mml:mi mathvariant="bold-italic">K</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>8. Calculations. <inline-formula id="inf52">
<mml:math id="m63">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:msup>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>: Each thread computes one specific difference element <inline-formula id="inf53">
<mml:math id="m64">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:msup>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. The result is stored in <bold>temp4.</bold>
</p>
</list-item>
<list-item>
<p>9. Calculations. <inline-formula id="inf54">
<mml:math id="m65">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>: Threads compute one value of the refined prediction of the next state at a time <inline-formula id="inf55">
<mml:math id="m66">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> by adding the product of K by <inline-formula id="inf56">
<mml:math id="m67">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:msup>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> stored in temp4, to the initial state prediction.</p>
</list-item>
<list-item>
<p>10. Calculations. <inline-formula id="inf57">
<mml:math id="m68">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>: Threads compute one specific element of the matrix <inline-formula id="inf58">
<mml:math id="m69">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The result is stored in <bold>temp5.</bold>
</p>
</list-item>
<list-item>
<p>11. Calculations. <inline-formula id="inf59">
<mml:math id="m70">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>: The threads compute the updated covariance matrix <inline-formula id="inf60">
<mml:math id="m71">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> by multiplying <inline-formula id="inf61">
<mml:math id="m72">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> stored in <bold>temp5</bold>, with the initial value of the covariance matrix <inline-formula id="inf62">
<mml:math id="m73">
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>12. Synchronization of threads: The <bold>__syncthreads()</bold> function ensures that all threads in a block complete their calculations and synchronize them before continuing execution.</p>
</list-item>
</list>
</p>
<p>So, <xref ref-type="statement" rid="alg1">Algorithm 1</xref> described implementing the proposed algorithm.</p>
<p>
<statement content-type="algorithm" id="alg1">
<label/>
<p>
<list list-type="simple">
<list-item>
<p>&#x2003;//Initialization of constant variables and matrices</p>
</list-item>
<list-item>
<p>&#x2003;//const Predict, Measure, PredictSize, CovSize, MeasureSize</p>
</list-item>
<list-item>
<p>&#x2003;//H, HT, A, AT, Q, R, I</p>
</list-item>
<list-item>
<p>&#x2003;function ele&#x5f;multi (A, B, Awidth, Bwidth, tx, ty):</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;P, k &#x3d; 0, 0</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;for k &#x3d; 0 to Awidth: P &#x2b; &#x3d; A [ty &#x2a; Awidth &#x2b; k] &#x2a;B [k &#x2a; Bwidth &#x2b; tx]</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;return P</p>
</list-item>
<list-item>
<p>&#x2003;function inv&#x5f;cpu (a&#x5f;i, c&#x5f;o, n):</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;d &#x3d; 0, n &#x3d; 3</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;for i &#x3d; 0 to 3: d &#x2b; &#x3d; a&#x5f;i [0 &#x2a; 3 &#x2b; i] &#x2a; (a&#x5f;i [1 &#x2a;n &#x2b; ((i &#x2b; 1) % 3)] &#x2a; a&#x5f;i [2 &#x2a;n &#x2b; ((i &#x2b; 2) % 3)] - a&#x5f;i [1 &#x2a;n &#x2b; ((i &#x2b; 2) % 3)] &#x2a; a&#x5f;i [2 &#x2a;n &#x2b; ((i &#x2b; 1) % 3)])</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;if d: for i &#x3d; 0 to 3: for j &#x3d; 0 to 3: c&#x5f;o [i &#x2a;n &#x2b; j] &#x3d; ((a&#x5f;i [((j &#x2b; 1) % 3) &#x2a;n &#x2b; ((i &#x2b; 1) % 3)] &#x2a; a&#x5f;i [((j &#x2b; 2) % 3) &#x2a;n &#x2b; ((i &#x2b; 2) % 3)]) - (a&#x5f;i [((j &#x2b; 1) % 3) &#x2a;n &#x2b; ((i &#x2b; 2) % 3)] &#x2a; a&#x5f;i [((j &#x2b; 2) % 3) &#x2a;n &#x2b; ((i &#x2b; 1) % 3)]))/d</p>
</list-item>
<list-item>
<p>&#x2003;function PredictKernel (predictD, covD, new&#x5f;predictD, new&#x5f;covD, point&#x5f;num):</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;for bx &#x3d; 0 to point&#x5f;num:</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;for tx &#x3d; 0 to Predict: new&#x5f;predictD [bx &#x2a; PredictSize &#x2b; tx] &#x3d; ele&#x5f;multi (A, predictD &#x2b; bx &#x2a; PredictSize, Predict, 1, tx, 0), temp [0][tx] &#x3d; ele&#x5f;multi (A, covD &#x2b; bx &#x2a; CovSize, Predict, Predict, tx, 0)</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x5f;&#x5f;syncthreads ()</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;new&#x5f;covD [bx &#x2a; CovSize &#x2b; tx] &#x3d; ele&#x5f;multi (temp, AT, Predict, Predict, tx, 0) &#x2b; Q [tx],</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x5f;&#x5f;syncthreads ()</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;function UpdateKernel (dataD, predictD, covD, new&#x5f;predictD, new&#x5f;covD, point&#x5f;num, ite&#x5f;num):</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;for bx &#x3d; 0 to point&#x5f;num:</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;for ty &#x3d; 0 to Measure: temp [ty][0] &#x3d; ele&#x5f;multi (H, covD &#x2b; bx &#x2a; CovSize, Predict, Predict, 0, ty)</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x5f;&#x5f;syncthreads ()</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;for ty &#x3d; 0 to Measure: for tx &#x3d; 0 to Measure: temp2 [ty][tx] &#x3d; ele&#x5f;multi (temp, HT, Predict, Measure, tx, ty)</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x5f;&#x5f;syncthreads ()</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;for tx &#x3d; 0 to Measure: temp3 [0][tx] &#x3d; ele&#x5f;multi (covD &#x2b; bx &#x2a; CovSize, HT, Predict, Measure, tx, 0)</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x5f;&#x5f;syncthreads ()</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;for ty &#x3d; 0 to 2: for tx &#x3d; 0 to 1: temp2&#x5f;inv [ty &#x2a; Measure &#x2b; tx] &#x3d; ele&#x5f;multi (temp2&#x5f;inv&#x5f;f, temp3, Measure, Measure, tx, ty)</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;&#x5f;&#x5f;syncthreads ()</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;for ty &#x3d; 0 to Measure: temp4 [ty] &#x3d; dataD [MeasureSize &#x2a; bx &#x2b; ty] - ele&#x5f;multi (H, predictD &#x2b; bx &#x2a; PredictSize, Predict, 1, 0, ty)</p>
</list-item>
</list>
</p>
</statement>
<statement content-type="algorithm" id="alg2">
<label>Algorithm 1</label>
<p>Parallelized the Kalman algorithm.<list list-type="simple">
<list-item>
<p>&#x2003;&#x2003;&#x2003;if tx &#x3d; &#x3d; 0: new&#x5f;predictD [bx &#x2a; PredictSize &#x2b;0] &#x3d; predictD [bx &#x2a; PredictSize &#x2b;0] &#x2b; ele&#x5f;multi (K, temp4, Measure, 1, 0, 0)</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;temp5 [0][0] &#x3d; I [0][0] - ele&#x5f;multi (K, H, Measure, Predict, 0, 0), &#x5f;&#x5f;syncthreads ()</p>
</list-item>
<list-item>
<p>&#x2003;&#x2003;&#x2003;new&#x5f;covD [bx &#x2a; PredictSize &#x2b; tx] &#x3d; ele&#x5f;multi (temp5, covD &#x2b; bx &#x2a; CovSize, Predict, Predict, tx, 0)</p>
</list-item>
</list>
</p>
</statement>
</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<p>For the study, a random three-dimensional space of values was generated (<xref ref-type="bibr" rid="B37">Top Streamers on Twitch, 2023</xref>) since the object of study is the speed of the algorithm. Random values were generated taking into account the contribution of measurement error and had a nonlinear characteristic.</p>
<p>The tests were conducted on a system with the following characteristics:</p>
<sec id="s3-1">
<title>3.1 System</title>
<p>CPU: core i5-8500H.</p>
<p>RAM: DDR4 2,667 MHz 24 Gb.</p>
<p>GPU: NVIDIA GeForce GTX 1050.</p>
<p>GPU RAM: GDDR5 8192 MB.</p>
<p>CUDA Cores: 640.</p>
</sec>
<sec id="s3-2">
<title>3.2 Memory interface: 256-bit</title>
<p>GPU Interface: PCI Express x8 Gen3.</p>
<p>As a result of applying the approach proposed in this paper, the execution times of the parallelized algorithm using CUDA and the algorithm implemented on the CPU are presented in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Execution time, s.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Number of points</th>
<th align="center">GPU</th>
<th align="center">CPU</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">10</td>
<td align="center">1.074</td>
<td align="center">0.334</td>
</tr>
<tr>
<td align="center">50</td>
<td align="center">1.589</td>
<td align="center">1.568</td>
</tr>
<tr>
<td align="center">100</td>
<td align="center">2.695</td>
<td align="center">2.957</td>
</tr>
<tr>
<td align="center">500</td>
<td align="center">6.350</td>
<td align="center">15.227</td>
</tr>
<tr>
<td align="center">1,000</td>
<td align="center">8.295</td>
<td align="center">29.450</td>
</tr>
<tr>
<td align="center">2000</td>
<td align="center">16.253</td>
<td align="center">58.511</td>
</tr>
<tr>
<td align="center">5,000</td>
<td align="center">38.609</td>
<td align="center">146.716</td>
</tr>
<tr>
<td align="center">10,000</td>
<td align="center">77.652</td>
<td align="center">295.801</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>For visualization, the data in <xref ref-type="table" rid="T1">Table 1</xref> are presented in <xref ref-type="fig" rid="F2">Figure 2</xref>. According to the results shown in the table, for a small number of points, up to 50, the single-threaded CPU algorithm performs the same number of operations faster, because there are not enough points to optimally load the GPU and the time spent by CUDA on memory allocation is longer than the time of computation.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Visualization of the execution time of the proposed algorithm on GPU using CUDA and on the CPU.</p>
</caption>
<graphic xlink:href="frobt-11-1341689-g002.tif"/>
</fig>
<p>Based on the data in <xref ref-type="table" rid="T1">Table 1</xref>, the value of the resulting acceleration <inline-formula id="inf63">
<mml:math id="m74">
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> which is shown in <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Acceleration of the parallel Kalman algorithm when using CUDA relative to CPU.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Number of points</th>
<th align="center">10</th>
<th align="center">50</th>
<th align="center">100</th>
<th align="center">500</th>
<th align="center">1,000</th>
<th align="center">2000</th>
<th align="center">5,000</th>
<th align="center">10,000</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">
<inline-formula id="inf64">
<mml:math id="m75">
<mml:mrow>
<mml:mi mathvariant="bold-italic">S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">0.310</td>
<td align="center">0.986</td>
<td align="center">1.097</td>
<td align="center">2.397</td>
<td align="center">3.550</td>
<td align="center">3.600</td>
<td align="center">3.800</td>
<td align="center">3.809</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The results in <xref ref-type="table" rid="T2">Table 2</xref> show that the speedup reaches its threshold under the given conditions by 3.8. The reliability of this result, obtained based on numerical experiments of the software implementation of the proposed algorithm, is confirmed by the previously obtained theoretical estimate of the speedup, which should be equal to 3.9. Thus, there is a speedup after 500 points, but before that the algorithm is slower than the traditional one. These results are also visualized in <xref ref-type="fig" rid="F3">Figure 3</xref>. As you can see, the algorithm gets close to the maximum speedup from 1,000 to 5,000 points. And then it slowly rises.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Acceleration of the parallel Kalman filter algorithm.</p>
</caption>
<graphic xlink:href="frobt-11-1341689-g003.tif"/>
</fig>
<p>Thus, the software implementation of the proposed algorithm made it possible to process object location data up to 3.8 times faster. This acceleration, in turn, makes it possible to build real-time systems that require fast localization processing. Such systems can be autonomous vehicles or car pilot assistance systems, where the fastest possible processing of frequently received data on position in space and environment is required. The high speed of data flow is optimally suited for processing on CUDA.</p>
<p>In order to compare the sequential Kalman algorithm and the parallel implementation, tests and measurements were performed for different sizes of item datasets. These data were averaged and presented in a report for further analysis. Based on the results of the tests of the sequential and parallel Kalman algorithms, a graph comparing the average acceleration over different ranges of the number of points was constructed, which is shown in <xref ref-type="fig" rid="F4">Figure 4</xref>.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Comparison of average GPU acceleration for different datasets.</p>
</caption>
<graphic xlink:href="frobt-11-1341689-g004.tif"/>
</fig>
<p>According to <xref ref-type="fig" rid="F4">Figure 4</xref>, we can conclude that it is more efficient to delegate localization using the Kalman algorithm to a larger GPU, from 500 points. As we can see from the averaged results, the use of CUDA on average gives an increase in execution speed of about 2.4 times. While when using the optimal set, it is 3.7 times faster. It can also be seen that using only suboptimal sets leads to losses in execution speed, as due to the previously described features of the algorithm and technology, the acceleration is only 80% of the typical CPU sequential algorithm.</p>
<p>As a result, we obtained a variation of the Kalman algorithm for localization in 3D space. The proposed approach is appropriate only for systems with a high load of data flow for localization, which will speed up localization by 3.8 times. It may be inappropriate to use it instead of the traditional approach when there are long delays between new data acquisitions, as the execution speed will not increase. Accuracy for both variations of the localization algorithm is shown in <xref ref-type="table" rid="T3">Table 3</xref>. As can be seen from the results, the quality of localization is almost identical for both algorithms. According to the results, both implementations have an average localization error of 3%, which indicates a shift from the real position by no more than 3% from the original position.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Localization error.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">Number of points</th>
<th colspan="2" align="center">Localization error, %</th>
</tr>
<tr>
<th align="center">GPU</th>
<th align="center">CPU</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">10</td>
<td align="center">3.156481147</td>
<td align="center">3.121895142</td>
</tr>
<tr>
<td align="center">50</td>
<td align="center">3.26348609</td>
<td align="center">3.341513087</td>
</tr>
<tr>
<td align="center">100</td>
<td align="center">2.829944308</td>
<td align="center">2.818013953</td>
</tr>
<tr>
<td align="center">500</td>
<td align="center">2.726926206</td>
<td align="center">2.721352679</td>
</tr>
<tr>
<td align="center">1,000</td>
<td align="center">3.087015553</td>
<td align="center">3.086803368</td>
</tr>
<tr>
<td align="center">2000</td>
<td align="center">2.995234731</td>
<td align="center">3.058086489</td>
</tr>
<tr>
<td align="center">5,000</td>
<td align="center">3.067066336</td>
<td align="center">3.161766167</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Thus, the proposed algorithm allows us to obtain a solution 3.8 times faster without reducing the localization error, which is effective in real-time decision making.</p>
<p>The results obtained were compared with the previous results obtained by other authors. For example, in (<xref ref-type="bibr" rid="B13">Jonsson, 2012</xref>), the author obtained a speedup of 1.5 for the number of points of 500&#x2013;10000, while in our work this result was improved by 60%. Also, compared to (<xref ref-type="bibr" rid="B29">Osman et al., 2021</xref>), we managed to improve the speedup by about 90%. In (<xref ref-type="bibr" rid="B32">Sheikhpour and Atia, 2022</xref>), the authors managed to speed up the processing time of a parallel algorithm by 41% compared to a conventional sequential implementation, while we managed to do it by 73%.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<title>4 Conclusion</title>
<p>In this paper, we developed a parallelized version of the Kalman algorithm in 3D using CUDA to accelerate the computational speed of Lidar localization. Localization using Lidar is relevant for autonomous driving in regions where the global navigation satellite system does not work. The result is a software product that processes object location data faster and can be used for real-time systems, such as autonomous vehicles or car pilot assistance systems, where frequently received data on position and environment need to be processed as quickly as possible. High data rates are optimally suited for CUDA processing.</p>
<p>We tested and measured the efficiency of the sequential Kalman algorithm and the parallel implementation on different sizes of lidar position datasets. It turned out that the use of CUDA is more efficient on larger datasets, from 500 points. On average, using CUDA gives an increase in execution speed of about 2.4 times, while on the optimal set &#x2013; 3.8 times. Using CUDA on suboptimal sets can lead to losses in execution speed, since due to the peculiarities of the algorithm and acceleration technology, the execution speed is only 80% of the sequential version. This can be explained by the fact that CUDA allows you to calculate many points in parallel at the same time, which reduces the execution time of the algorithm.</p>
<p>In general, the use of CUDA can significantly increase the efficiency of the Kalman algorithm, because in real conditions, it is necessary to constantly process the data coming from the lidar. Nowadays, there are many CUDA-enabled edge devices, which confirms the relevance of the presented algorithm and this study.</p>
<p>Prospects for further research include the possibility of extending the proposed algorithm to the case of 2D LIDAR data and analyzing the proposed algorithm based on SLAM technology (<xref ref-type="bibr" rid="B46">Zhu et al., 2009</xref>).</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found in the article/Supplementary material.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>LM: Conceptualization, Data curation, Formal Analysis, Funding acquisition, Investigation, Methodology, Project administration, Resources, Software, Supervision, Validation, Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This research was funded by National Research Foundation of Ukraine project number 2023.03/0029.</p>
</sec>
<ack>
<p>The author thank the reviewers for the relevant comments that helped to present the paper better. The National Research Foundation of Ukraine funds this study from the state budget of Ukraine within the project No 2023.03/0029.</p>
</ack>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The author declares that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Amin</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Abbasi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Rehman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>G. S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>An advanced algorithm for higher network navigation in social internet of things using small-world networks</article-title>. <source>Sensors</source> <volume>19</volume> (<issue>2007</issue>), <fpage>2007</fpage>. <pub-id pub-id-type="doi">10.3390/s19092007</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Anshu</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2019</year>). <source>An analytical study of Amdahl&#x27;s and Gustafson&#x27;s law</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://ssrn.com/abstract=3435202">https://ssrn.com/abstract&#x3d;3435202</ext-link>.</comment>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Shang</surname>
<given-names>W.-L.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Cooperative and energy-efficient strategies in emergency navigation using edge computing</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>54441</fpage>&#x2013;<lpage>54455</lpage>. <pub-id pub-id-type="doi">10.1109/access.2020.2982120</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chiang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Chiu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Srinara</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tsai</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Performance of LiDAR-SLAM-based PNT with initial poses based on NDT scan matching algorithm</article-title>. <source>Satell. Navig.</source> <volume>4</volume> (<issue>3</issue>), <fpage>3</fpage>. <pub-id pub-id-type="doi">10.1186/s43020-022-00092-0</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dabbiru</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Goodin</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Scherrer</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Carruth</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>LiDAR data segmentation in off-road environment using convolutional neural networks (CNN)</article-title>. <source>SAE Tech. Pap. Ser.</source> <volume>2</volume> (<issue>6</issue>), <fpage>3288</fpage>&#x2013;<lpage>3292</lpage>. <comment>pg. 3288</comment>. <pub-id pub-id-type="doi">10.4271/2020-01-0696</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deng</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Shu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ha</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>An optimized FPGA-based real-time NDT for 3D-LiDAR localization in smart vehicles</article-title>. <source>IEEE Trans. Circuits Syst. II Express Briefs</source> <volume>68</volume>, <fpage>3167</fpage>&#x2013;<lpage>3171</lpage>. <pub-id pub-id-type="doi">10.1109/tcsii.2021.3095764</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Dong</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Stachniss</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Online range image-based Pole extractor for long-term LiDAR localization in urban environments</article-title>,&#x201d; in <source>2021 European conference on mobile robots (ECMR)</source>, <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Elhousni</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>A survey on 3d lidar localization for autonomous vehicles</article-title>,&#x201d; in <source>2020 IEEE intelligent vehicles symposium (IV)</source> (<publisher-name>IEEE</publisher-name>), <fpage>1879</fpage>&#x2013;<lpage>1884</lpage>.</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Garland</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Le Grand</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Nickolls</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Anderson</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hardwick</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Morton</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2008</year>). <article-title>Parallel computing experiences with CUDA</article-title>. <source>IEEE Micro</source> <volume>28</volume> (<issue>4</issue>), <fpage>13</fpage>&#x2013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1109/mm.2008.57</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Lidar boosts 3D ecological observations and modelings: a review and perspective</article-title>. <source>IEEE Geoscience Remote Sens. Mag.</source> <volume>9</volume> (<issue>26</issue>), <fpage>232</fpage>&#x2013;<lpage>257</lpage>. <pub-id pub-id-type="doi">10.1109/mgrs.2020.3032713</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Parallel differential evolutionary Particle filtering algorithm based on the CUDA unfolding cycle</article-title>. <source>Wirel. Commun. Mob. Comput.</source> <volume>2021</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1155/2021/1999154</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jang</surname>
<given-names>K. W.</given-names>
</name>
<name>
<surname>Jeong</surname>
<given-names>W. J.</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Development of a GPU-accelerated NDT localization algorithm for GNSS-denied urban areas</article-title>. <source>Sensors</source> <volume>22</volume> (<issue>1913</issue>), <fpage>1913</fpage>. <pub-id pub-id-type="doi">10.3390/s22051913</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Jonsson</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2012</year>). <source>&#x201c;Parallelization of the Kalman filter for multi-output systems on multicore platforms (Dissertation)&#x201d;</source>. <comment>Accessed <ext-link ext-link-type="uri" xlink:href="https://urn.kb.se/resolve?urn=urn:nbn:se:uu:diva-205553">https://urn.kb.se/resolve?urn&#x3d;urn:nbn:se:uu:diva-205553</ext-link>.</comment>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Koide</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yokozuka</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Oishi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Banno</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Globally consistent 3D LiDAR mapping with GPU-accelerated GICP matching cost factors</article-title>. <source>IEEE Robotics Automation Lett.</source> <volume>6</volume> (<issue>4</issue>), <fpage>8591</fpage>&#x2013;<lpage>8598</lpage>. <pub-id pub-id-type="doi">10.1109/lra.2021.3113043</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kovtun</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Altameem</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Al-Maitah</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kempa</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>The Markov concept of the energy efficiency assessment of the edge computing infrastructure peripheral server functioning over time</article-title>. <source>Electronics</source> <volume>12</volume> (<issue>20</issue>), <fpage>4320</fpage>. <pub-id pub-id-type="doi">10.3390/electronics12204320</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kovtun</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Izonin</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Gregus</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023b</year>). <article-title>The functional safety assessment of cyber-physical system operation process described by Markov chain</article-title>. <source>Sci. Rep.</source> <volume>12</volume>, <fpage>7089</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-022-11193-w</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Trajectory prediction and visual localization of snake robot based on BiLSTM neural network</article-title>. <source>Appl. Intell.</source> <volume>53</volume>, <fpage>27790</fpage>&#x2013;<lpage>27807</lpage>. <pub-id pub-id-type="doi">10.1007/s10489-023-04897-7</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lou</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>SLAM and 3D semantic reconstruction based on the fusion of lidar and monocular vision</article-title>. <source>Sensors</source> <volume>23</volume> (<issue>1502</issue>), <fpage>1502</fpage>. <pub-id pub-id-type="doi">10.3390/s23031502</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Smart</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Real-time performance-focused localization techniques for autonomous vehicle: a review</article-title>. <source>Trans. Intell. Transp. Sys.</source> <volume>23</volume> (<issue>7</issue>), <fpage>6082</fpage>&#x2013;<lpage>6100</lpage>. <pub-id pub-id-type="doi">10.1109/tits.2021.3077800</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Benslimane</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Localization and navigation in autonomous driving: threats and countermeasures</article-title>. <source>IEEE Wirel. Commun.</source> <volume>26</volume> (<issue>4</issue>), <fpage>38</fpage>&#x2013;<lpage>45</lpage>. <pub-id pub-id-type="doi">10.1109/mwc.2019.1800533</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Marck</surname>
<given-names>J. W.</given-names>
</name>
<name>
<surname>Mohamoud</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>vd Houwen</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>van Heijster</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2013</year>). &#x201c;<article-title>Indoor radar SLAM A radar application for vision and gps denied environments</article-title>,&#x201d; in <source>Proceedings of the 43rd European microwave conference</source> (<publisher-loc>Nuremberg, Germany</publisher-loc>: <publisher-name>EuMA, Louvain-la-Neuve</publisher-name>), <fpage>1783</fpage>&#x2013;<lpage>1786</lpage>.</citation>
</ref>
<ref id="B22">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Mendez Maldonado</surname>
<given-names>O. A.</given-names>
</name>
<name>
<surname>Hadfield</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bowden</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Markov localization using heatmap regression and deep convolutional odometry</article-title>,&#x201d; in <source>2021 IEEE international conference on robotics and automation (ICRA)</source>, <fpage>9638</fpage>&#x2013;<lpage>9644</lpage>.</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>An efficient variable step solar maximum power point tracking algorithm</article-title>. <source>Energies</source> <volume>16</volume>, <fpage>1299</fpage>. <pub-id pub-id-type="doi">10.3390/en16031299</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mochurad</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hladun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tkachenko</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>An obstacle-finding approach for autonomous mobile robots using 2D LiDAR data</article-title>. <source>Big Data Cognitive Comput.</source> <volume>7</volume> (<issue>43</issue>), <fpage>43</fpage>. <pub-id pub-id-type="doi">10.3390/bdcc7010043</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mochurad</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Kryvinska</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Parallelization of finding the current coordinates of the lidar based on the genetic algorithm and OpenMP technology</article-title>. <source>Symmetry</source> <volume>13</volume> (<issue>666</issue>), <fpage>666</fpage>. <pub-id pub-id-type="doi">10.3390/sym13040666</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Mochurad</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Matviiv</surname>
<given-names>O.-V.</given-names>
</name>
<name>
<surname>Lema</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Vilhutska</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2023b</year>). &#x201c;<article-title>CUDA-based algorithm for lidar position determination in mobile robotics</article-title>,&#x201d; in <source>Proceedings of the modern machine learning technologies and data science workshop</source> (<publisher-loc>Ukraine</publisher-loc>), <fpage>193</fpage>&#x2013;<lpage>203</lpage>.</citation>
</ref>
<ref id="B27">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Mochurad</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Shchur</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Parallelization of cryptographic algorithm based on different parallel computing technologies</article-title>,&#x201d; in <source>Proceedings of the symposium on information technologies and applied Sciences (IT&#x26;AS 2021)</source>, <fpage>20</fpage>&#x2013;<lpage>29</lpage>. <comment>Bratislava, Slovak Republic, ISSN 1613-0073</comment>.</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Monta&#xf1;ez</surname>
<given-names>O. J.</given-names>
</name>
<name>
<surname>Suarez</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Fernandez</surname>
<given-names>E. A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Application of data sensor fusion using extended kalman filter algorithm for identification and tracking of moving targets from LiDAR&#x2013;radar data</article-title>. <source>Remote Sens.</source> <volume>15</volume> (<issue>13</issue>), <fpage>3396</fpage>. <pub-id pub-id-type="doi">10.3390/rs15133396</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Osman</surname>
<given-names>H. H.</given-names>
</name>
<name>
<surname>Ismail</surname>
<given-names>I. A.</given-names>
</name>
<name>
<surname>Morsy</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Hawidi</surname>
<given-names>H. M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Implementing the kalman filter algorithm in parallel form: denoising sound wave as a case study</article-title>. <source>Recent Adv. comput. Sci. Commun.</source> <volume>14</volume>, <fpage>2828</fpage>&#x2013;<lpage>2835</lpage>. <pub-id pub-id-type="doi">10.2174/2666255813999200806161813</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Phang</surname>
<given-names>F. A.</given-names>
</name>
<name>
<surname>Pusppanathan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Nawi</surname>
<given-names>N. D.</given-names>
</name>
<name>
<surname>Zulkifli</surname>
<given-names>N. A.</given-names>
</name>
<name>
<surname>Zulkapri</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Che Harun</surname>
<given-names>F. K.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Integrating drone technology in service learning for engineering students</article-title>. <source>Int. J. Emerg. Technol. Learn.</source> <volume>16</volume> (<issue>15</issue>), <fpage>78</fpage>&#x2013;<lpage>90</lpage>. <pub-id pub-id-type="doi">10.3991/ijet.v16i15.23673</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Poulose</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Baek</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>D. S.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Point cloud map generation and localization for autonomous vehicles using 3D lidar scans</article-title>,&#x201d; in <source>2022 27th asia pacific conference on communications (APCC)</source> (<publisher-loc>Jeju Island, Korea</publisher-loc>), <fpage>336</fpage>&#x2013;<lpage>341</lpage>.</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sheikhpour</surname>
<given-names>K. S.</given-names>
</name>
<name>
<surname>Atia</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A real-time CPU-GPU embedded implementation of a tightly-coupled visual-inertial navigation system</article-title>. <source>IEEE Access</source> <volume>10</volume>, <fpage>86384</fpage>&#x2013;<lpage>86394</lpage>. <pub-id pub-id-type="doi">10.1109/access.2022.3199384</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Shreyas Madhav</surname>
<given-names>A. V.</given-names>
</name>
<name>
<surname>Rajesh Kanna</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Parallel FPFH SLAM for aerial vehicles</article-title>,&#x201d; in <source>2021 IEEE conference on norbert wiener in the 21st century (21CW)</source>, <fpage>1</fpage>&#x2013;<lpage>4</lpage>.</citation>
</ref>
<ref id="B34">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Shymanskyi</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Sokolovskyy</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Sokolovskyy</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bubnyak</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Variational method for solving the time-fractal heat conduction problem in the Claydite-Block construction</article-title>,&#x201d; in <source>Advances in computer science for engineering and education, ICCSEEA 2022; lecture notes on data engineering and communications technologies</source> (<publisher-loc>Cham, Switzerland</publisher-loc>: <publisher-name>Springer</publisher-name>), <volume>134</volume>, <fpage>97</fpage>&#x2013;<lpage>106</lpage>.</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ha</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Efficient FPGA implementation of K-Nearest-Neighbor search algorithm for 3D LIDAR localization and mapping in smart vehicles</article-title>. <source>IEEE Trans. Circuits Syst. II Express Briefs</source> <volume>67</volume> (<issue>9</issue>), <fpage>1644</fpage>&#x2013;<lpage>1648</lpage>. <pub-id pub-id-type="doi">10.1109/tcsii.2020.3013758</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Tian</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Unsupervised object detection with lidar cues</article-title>,&#x201d; in <source>2021 IEEE/CVF conference on computer vision and pattern recognition (CVPR)</source>, <fpage>5962</fpage>&#x2013;<lpage>5972</lpage>.</citation>
</ref>
<ref id="B37">
<citation citation-type="book">
<collab>Top Streamers on Twitch</collab> (<year>2023</year>). <source>Top Streamers on Twitch</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/aayushmishra1512/twitchdata">https://www.kaggle.com/datasets/aayushmishra1512/twitchdata</ext-link> (Accessed December 26, 2023)</comment>.</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Varsi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Taylor</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kekempanos</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Pyzer Knapp</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Maskell</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A fast parallel Particle filter for shared memory systems</article-title>. <source>IEEE Signal Process. Lett.</source> <volume>27</volume>, <fpage>1570</fpage>&#x2013;<lpage>1574</lpage>. <pub-id pub-id-type="doi">10.1109/lsp.2020.3014035</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Varsi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Taylory</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kekempanos</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Pyzer Knapp</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Maskell</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A fast parallel Particle filter for shared memory systems</article-title>. <source>IEEE Signal Process. Lett.</source> <volume>27</volume>, <fpage>1570</fpage>&#x2013;<lpage>1574</lpage>. <pub-id pub-id-type="doi">10.1109/lsp.2020.3014035</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Obstacle-avoidance path-planning algorithm for autonomous vehicles based on B-spline algorithm</article-title>. <source>World Electr. Veh. J.</source> <volume>13</volume> (<issue>233</issue>), <fpage>233</fpage>. <pub-id pub-id-type="doi">10.3390/wevj13120233</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wo</surname>
<given-names>D.-J.</given-names>
</name>
<name>
<surname>Biswal</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Implementation and performance Analysis of kalman filters with consistency validation</article-title>. <source>Mathematics</source> <volume>11</volume> (<issue>521</issue>), <fpage>521</fpage>. <pub-id pub-id-type="doi">10.3390/math11030521</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xie</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Real-time LiDAR point cloud semantic segmentation for autonomous driving</article-title>. <source>Electronics</source> <volume>11</volume> (<issue>11</issue>), <fpage>11</fpage>. <pub-id pub-id-type="doi">10.3390/electronics11010011</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Niu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Blasch</surname>
<given-names>E. P.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Uncertainty aware EKF: a tracking filter learning LiDAR measurement uncertainty</article-title>,&#x201d; in <source>2022 25th international conference on information fusion (FUSION)</source> (<publisher-loc>Sweden</publisher-loc>: <publisher-name>Link&#xf6;ping</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>8</lpage>.</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Fast and robust iterative closest point</article-title>. <source>IEEE Trans. Pattern Analysis Mach. Intell.</source> <volume>44</volume> (<issue>7</issue>), <fpage>3450</fpage>&#x2013;<lpage>3466</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.2021.3054619</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Performance enhanced Kalman filter design for non-Gaussian stochastic systems with data-based minimum entropy optimisation</article-title>. <source>AIMS Electron. Electr. Eng.</source> <volume>3</volume> (<issue>4</issue>), <fpage>382</fpage>&#x2013;<lpage>396</lpage>. <pub-id pub-id-type="doi">10.3934/electreng.2019.4.382</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2009</year>). &#x201c;<article-title>A SLAM algorithm based on the central difference Kalman filter</article-title>,&#x201d; in <source>2009 IEEE intelligent vehicles symposium</source> (<publisher-loc>China</publisher-loc>: <publisher-name>Xi&#x27;an</publisher-name>), <fpage>123</fpage>&#x2013;<lpage>128</lpage>.</citation>
</ref>
</ref-list>
</back>
</article>