<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2023.1268852</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Artificial Intelligence</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Fitting a collider in a quantum computer: tackling the challenges of quantum machine learning for big datasets</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Peixoto</surname> <given-names>Miguel Ca&#x000E7;ador</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2587466/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Castro</surname> <given-names>Nuno Filipe</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2382436/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Crispim Rom&#x000E3;o</surname> <given-names>Miguel</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2550516/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Oliveira</surname> <given-names>Maria Gabriela Jord&#x000E3;o</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2582548/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Ochoa</surname> <given-names>In&#x000EA;s</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2519721/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>LIP&#x02014;Laborat&#x000F3;rio de Instrumenta&#x000E7;&#x000E3;o e F&#x000ED;sica Experimental de Part&#x000ED;culas, Escola de Ci&#x000EA;ncias, Universidade do Minho</institution>, <addr-line>Braga</addr-line>, <country>Portugal</country></aff>
<aff id="aff2"><sup>2</sup><institution>Departamento de F&#x000ED;sica, Escola de Ci&#x000EA;ncias, Universidade do Minho</institution>, <addr-line>Braga</addr-line>, <country>Portugal</country></aff>
<aff id="aff3"><sup>3</sup><institution>Department of Physics and Astronomy, University of Southampton</institution>, <addr-line>Southampton</addr-line>, <country>United Kingdom</country></aff>
<aff id="aff4"><sup>4</sup><institution>LIP&#x02014;Laborat&#x000F3;rio de Instrumenta&#x000E7;&#x000E3;o e F&#x000ED;sica Experimental de Part&#x000ED;culas</institution>, <addr-line>Lisbon</addr-line>, <country>Portugal</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Matt LeBlanc, The University of Manchester, United Kingdom</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Jack Y. Araz, Jefferson Lab (DOE), United States; Stefano Scali, University of Exeter, United Kingdom</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Nuno Filipe Castro <email>nuno.castro&#x00040;fisica.uminho.pt</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>15</day>
<month>12</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>6</volume>
<elocation-id>1268852</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>07</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>20</day>
<month>11</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2023 Peixoto, Castro, Crispim Rom&#x000E3;o, Oliveira and Ochoa.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Peixoto, Castro, Crispim Rom&#x000E3;o, Oliveira and Ochoa</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Current quantum systems have significant limitations affecting the processing of large datasets with high dimensionality, typical of high energy physics. In the present paper, feature and data prototype selection techniques were studied to tackle this challenge. A grid search was performed and quantum machine learning models were trained and benchmarked against classical shallow machine learning methods, trained both in the reduced and the complete datasets. The performance of the quantum algorithms was found to be comparable to the classical ones, even when using large datasets. Sequential Backward Selection and Principal Component Analysis techniques were used for feature&#x00027;s selection and while the former can produce the better quantum machine learning models in specific cases, it is more unstable. Additionally, we show that such variability in the results is caused by the use of discrete variables, highlighting the suitability of Principal Component analysis transformed data for quantum machine learning applications in the high energy physics context.</p></abstract>
<kwd-group>
<kwd>high energy physics</kwd>
<kwd>quantum computing</kwd>
<kwd>quantum machine learning</kwd>
<kwd>K-means</kwd>
<kwd>principal component analysis</kwd>
<kwd>data reduction</kwd>
</kwd-group>
<contract-num rid="cn001">CERN/FIS-COM/0004/2021</contract-num>
<contract-num rid="cn002">LCF/BQ/PI20/11760025</contract-num>
<contract-num rid="cn003">847648</contract-num>
<contract-sponsor id="cn001">Funda&#x000E7;&#x000E3;o para a Ci&#x000EA;ncia e a Tecnologia<named-content content-type="fundref-id">10.13039/501100001871</named-content></contract-sponsor>
<contract-sponsor id="cn002">&#x00027;la Caixa&#x00027; Foundation<named-content content-type="fundref-id">10.13039/100010434</named-content></contract-sponsor>
<contract-sponsor id="cn003">H2020 Marie Sk&#x00142;odowska-Curie Actions<named-content content-type="fundref-id">10.13039/100010665</named-content></contract-sponsor>
<counts>
<fig-count count="13"/>
<table-count count="6"/>
<equation-count count="9"/>
<ref-count count="58"/>
<page-count count="17"/>
<word-count count="10052"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Big Data and AI in High Energy Physics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>The Standard Model of Particle Physics (SM) provides a remarkable description of the fundamental constituents of matter and their interactions, being in excellent agreement with the collider data accumulated so far. Nonetheless, there are still important open questions, unaddressed by the SM, such as gravity, dark matter, dark energy, or the matter-antimatter asymmetry in the universe (Ellis, <xref ref-type="bibr" rid="B25">2012</xref>), motivating a comprehensive search program for new physics phenomena beyond the SM (BSM) at the Large Hadron Collider (LHC) at CERN.</p>
<p>The search for BSM phenomena at colliders poses specific challenges in data processing and analysis, given the extremely large datasets involved and the low signal to background ratios expected. In this context, the analysis of the collision data obtained by the LHC experiments often relies on machine learning (ML), a field in computer science that can harness large amounts of data to train generalizable algorithms for a variety of applications (Guest et al., <xref ref-type="bibr" rid="B31">2018</xref>; Feickert and Nachman, <xref ref-type="bibr" rid="B27">2021</xref>), such as classification tasks. These techniques have shown an outstanding ability to find correlations in high-dimensional parameter spaces to discriminate between potential signal and background processes. They are known to scale with data, and usually rely on a large number of learnable parameters to achieve their remarkable performance.</p>
<p>In order to train these large models, classical<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> machine learning (CML) takes advantage of hardware accelerators, such as graphics processing units (GPUs), for efficient, parallel, and fast matrix multiplications. On the other hand, a new class of hardware is becoming available, with the advent of noisy intermediate-scale quantum (NISQ) computing devices. This accelerated the development of new quantum algorithms targeted at exploiting the capacity and feasibility of this new technology for ML applications.</p>
<p>Quantum machine learning (QML) is an emerging research field aiming to use quantum circuits to tackle ML tasks. One of the motivations for using this new technology in high energy physics (HEP) relates to the intrinsic properties of quantum computations, namely representing the data in a Hilbert space where the data can be in a superposition of states or in entangled states, which can allow to explore additional information in data analysis and, eventually, contribute to better classification of HEP events, namely in the context of the search for BSM phenomena. Recently, this new technology has been applied to various HEP problems (Guan et al., <xref ref-type="bibr" rid="B30">2021</xref>). Namely, in event reconstruction (Das et al., <xref ref-type="bibr" rid="B21">2019</xref>; Shapoval and Calafiura, <xref ref-type="bibr" rid="B47">2019</xref>; Bapst et al., <xref ref-type="bibr" rid="B6">2020</xref>; T&#x000FC;ys&#x000FC;z et al., <xref ref-type="bibr" rid="B51">2020</xref>; Wei et al., <xref ref-type="bibr" rid="B53">2020</xref>; Zlokapa et al., <xref ref-type="bibr" rid="B57">2021a</xref>; Funcke et al., <xref ref-type="bibr" rid="B28">2022</xref>), classification tasks (Mott et al., <xref ref-type="bibr" rid="B37">2017</xref>; Belis et al., <xref ref-type="bibr" rid="B7">2021</xref>; Blance and Spannowsky, <xref ref-type="bibr" rid="B11">2021</xref>; Terashi et al., <xref ref-type="bibr" rid="B49">2021</xref>; Wu et al., <xref ref-type="bibr" rid="B56">2021</xref>; Zlokapa et al., <xref ref-type="bibr" rid="B58">2021b</xref>; Araz and Spannowsky, <xref ref-type="bibr" rid="B5">2022</xref>; Chen et al., <xref ref-type="bibr" rid="B17">2022</xref>; Gianelle et al., <xref ref-type="bibr" rid="B29">2022</xref>), data generation (Chang et al., <xref ref-type="bibr" rid="B15">2021a</xref>,<xref ref-type="bibr" rid="B16">b</xref>; Delgado and Hamilton, <xref ref-type="bibr" rid="B23">2022</xref>; Borras et al., <xref ref-type="bibr" rid="B12">2023</xref>; Rehm et al., <xref ref-type="bibr" rid="B42">2023</xref>), and anomaly detection problems (Ngairangbam et al., <xref ref-type="bibr" rid="B38">2022</xref>; Alvi et al., <xref ref-type="bibr" rid="B2">2023</xref>; Schuhmacher et al., <xref ref-type="bibr" rid="B43">2023</xref>; Wo&#x0017A;niak et al., <xref ref-type="bibr" rid="B55">2023</xref>).</p>
<p>Despite the promising potential of quantum computation, NISQ processors have important limitations, such as the qubit quality (i.e., the accuracy with which it is possible to execute quantum gates), the qubit lifetime and the limited depth of quantum circuits, since for large circuits the noise overwhelms the signal (Li et al., <xref ref-type="bibr" rid="B35">2018</xref>; Preskill, <xref ref-type="bibr" rid="B40">2018</xref>). This necessarily limits the complexity of the circuits and the size of the datasets used to train them.</p>
<p>In this paper, we perform a systematic comparison of the performance of QML and shallow CML algorithms in HEP. The choice to focus on shallow methods rather than state-of-the-art architectures based on deep neural networks is to provide a fair comparison between methodologies, since neural networks are known to require large datasets (both in terms of sample size and dimension) to achieve good performance, something that is not feasible with current quantum computers. By choosing CML algorithms suited for smaller datasets, we will add to the on-going discussion regarding potential advantages of quantum computing by comparing QML and CML in the same footing.</p>
<p>The use of QML algorithms in this context is studied by targeting a common binary classification task in HEP: classifying a BSM signal against SM background. A benchmark BSM signal leading to the <italic>Zt</italic> final state is considered, in events with multiple leptons and <italic>b</italic>-tagged jets, which can be used to achieve a reasonable signal to background ratio. Variational quantum classifiers (VQC) are trained and optimized via a grid search. The use of reduced data is explored, considering both the number of features and the number of events, via different strategies: ranking of features, data transformations aiming for a richer reduced set of features, use of random samples, and choice of representative data samples.</p>
</sec>
<sec id="s2">
<title>2 Quantum machine learning</title>
<p>The QML algorithms are implemented using a quantum circuit, i.e., a collection of quantum gates applied to an <italic>n</italic>-qubit quantum state, followed by a measurement (or multiple measurements) that represent the output of the circuit. In order to implement a learning algorithm, the quantum circuit can be parameterized with parameters that can be learned by confronting the measurement to a loss function.</p>
<p>QML is effectively an extension of CML techniques to the Hilbert space, where instead of representing data as vectors in a high-dimensional real space, we encode it in state vectors of a Hilbert space. A QML algorithm, such as a quantum neural network, can be implemented using the quantum equivalent of a perceptron, one of the building blocks of CML. A problem arises from the realization that the activation functions used in CML can not be expressed using a linear operation, which is inherently required from the quantum evolution of a state. Ideas have been proposed to imitate an activation function in the quantum space (Gupta and Zia, <xref ref-type="bibr" rid="B32">2001</xref>; Schuld et al., <xref ref-type="bibr" rid="B45">2015</xref>), but, in the current paper, only variational quantum classifiers (Farhi and Neven, <xref ref-type="bibr" rid="B26">2018</xref>; Schuld et al., <xref ref-type="bibr" rid="B44">2020</xref>) are used for binary classification.</p>
<p>A VQC is a parameterized quantum circuit, a circuit type containing adjustable gates with tunable parameters. These gates are a universal set of quantum gates and, in the current study, rotation [<italic>R</italic><sub><italic>X</italic></sub>(<italic>w</italic>), <italic>R</italic><sub><italic>Y</italic></sub>(<italic>w</italic>), <italic>R</italic><sub><italic>Z</italic></sub>(<italic>w</italic>)] and CNOT gates are used.<xref ref-type="fn" rid="fn0002"><sup>2</sup></xref></p>
<p>The considered VQC pipeline used has the following components:</p>
<list list-type="bullet">
<list-item><p><bold>Data Embedding</bold>: the numerical vector <italic>X</italic> representing the classical information is converted to the quantum space with the preparation of an initial quantum state, |&#x003C8;<sub><italic>X</italic></sub>&#x0232A;, which represents a HEP event.</p></list-item>
<list-item><p><bold>Model circuit</bold>: a unitary transformation <italic>U</italic>(<italic>w</italic>), parameterized by a set of free parameters <italic>w</italic>, is applied to the initial quantum state |&#x003C8;<sub><italic>X</italic></sub>&#x0232A;. This produces the final state <inline-formula><mml:math id="M1"><mml:mrow><mml:mo>&#x0007C;</mml:mo><mml:msubsup><mml:mi>&#x003C8;</mml:mi><mml:mi>X</mml:mi><mml:mo>&#x00027;</mml:mo></mml:msubsup><mml:mo>&#x0232A;</mml:mo><mml:mo>=</mml:mo><mml:mi>U</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>w</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>&#x0007C;</mml:mo><mml:msub><mml:mi>&#x003C8;</mml:mi><mml:mi>X</mml:mi></mml:msub><mml:mo>&#x0232A;</mml:mo></mml:mrow></mml:math></inline-formula>.</p></list-item>
<list-item><p><bold>Measurement</bold>: a measurement of an observable is performed in one of the qubits of the state <inline-formula><mml:math id="M2"><mml:mrow><mml:mo>&#x0007C;</mml:mo><mml:msubsup><mml:mi>&#x003C8;</mml:mi><mml:mi>X</mml:mi><mml:mo>&#x00027;</mml:mo></mml:msubsup><mml:mo>&#x0232A;</mml:mo></mml:mrow></mml:math></inline-formula>, which will give the prediction of the model for the task at hand. The training of a VQC aims to find the best set of parameters <italic>w</italic> to match the event labels to the prediction.</p></list-item>
</list>
<p>Throughout this work, the <italic>PennyLane</italic> package (Bergholm et al., <xref ref-type="bibr" rid="B8">2018</xref>) was used as a basis for the hybrid quantum-classical machine learning applications. Leveraging <italic>PennyLane</italic>&#x00027;s <monospace>default.qubit</monospace> quantum simulator, a straightforward tool for quantum circuit simulations, we trained and assessed the performance of various QML algorithms. Subsequently, the performance of the algorithms trained on IBM&#x00027;s quantum computers was gauged by integrating <italic>PennyLane</italic> with IBM&#x00027;s quantum computing framework, <italic>Qiskit</italic> (Anis et al., <xref ref-type="bibr" rid="B4">2023</xref>).</p>
<sec>
<title>2.1 Data embedding</title>
<p>Before passing the data through the VQC, the preparation of the initial quantum state |&#x003C8;<sub><italic>X</italic></sub>&#x0232A; is required. This is called data embedding, and there are a number of proposals to perform this step (LaRose and Coyle, <xref ref-type="bibr" rid="B34">2020</xref>). Among the different possible embeddings, it was chosen to test amplitude embedding against angle embedding. The preliminary results have shown that angle embedding leads to a better performance than the former, as previously reported in a different context (Gianelle et al., <xref ref-type="bibr" rid="B29">2022</xref>). In this paper angle embedding was, therefore, the adopted choice. Further studies on possible embeddings is left for future works.</p>
<p>For an <italic>N</italic>-dimensional vector of classical information, <italic>X</italic> &#x0003D; (<italic>x</italic><sub>1</sub>, <italic>x</italic><sub>2</sub>, &#x02026;, <italic>x</italic><sub><italic>N</italic></sub>), the state entering the VQC will be defined via a state preparation circuit applied to the initial state of |0&#x0232A;<sup>&#x02297;<italic>N</italic></sup>. The information contained in <italic>X</italic> is embedded as angles: these are the values used in rotation gates applied to each qubit, thus requiring <italic>N</italic> qubits for embedding <italic>N</italic> features from the original dataset.</p>
<p>In the current study, the embedding is done using rotations around the <italic>x</italic>-axis on the Bloch sphere, thus defining the quantum state embedded with the classical information as:</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M3"><mml:mrow><mml:mo>&#x0007C;</mml:mo><mml:msub><mml:mi>&#x003C8;</mml:mi><mml:mi>X</mml:mi></mml:msub><mml:mo>&#x0232A;</mml:mo><mml:mo>=</mml:mo><mml:mstyle mathsize='140%' displaystyle='true'><mml:munderover><mml:mo>&#x02297;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:munderover></mml:mstyle><mml:msub><mml:mi>R</mml:mi><mml:mi>X</mml:mi></mml:msub><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:mo>=</mml:mo><mml:mstyle mathsize='140%' displaystyle='true'><mml:munderover><mml:mo>&#x02297;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:munderover></mml:mstyle><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>cos</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mn>2</mml:mn></mml:mfrac></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x0007C;</mml:mo><mml:mn>0</mml:mn><mml:mo>&#x0232A;</mml:mo><mml:mo>&#x02212;</mml:mo><mml:mi>i</mml:mi><mml:mi>sin</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mn>2</mml:mn></mml:mfrac></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x0007C;</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x0232A;</mml:mo></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
<p>where <inline-formula><mml:math id="M4"><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>X</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mi>i</mml:mi><mml:mi>x</mml:mi><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>&#x003C3;</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msup></mml:math></inline-formula> and <inline-formula><mml:math id="M5"><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>&#x003C3;</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is a Pauli operator. In this embedding each of the considered features of the original dataset is required to be bound between [&#x02212;&#x003C0;, &#x003C0;].</p>
</sec>
<sec>
<title>2.2 Model circuit</title>
<p>The model circuit is the key component of the VQC and includes the learnable set of parameters. It is defined by a parameterized unitary circuit <italic>U</italic>(<italic>w</italic>), with <italic>w</italic> being the set of tunable parameters, which will evolve a quantum state embedded with classical information &#x003C8;<sub><italic>X</italic></sub> into the final state <inline-formula><mml:math id="M6"><mml:msubsup><mml:mrow><mml:mi>&#x003C8;</mml:mi></mml:mrow><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>.</p>
<p>Analogously to the architecture of a classical neural network, the model circuit is formed by layers. Each layer is composed of an assemblage of rotation gates applied to each qubit in the system, followed by a set of CNOT gates.</p>
<p>A rotation gate, <italic>R</italic>, is designed to be applied to one single qubit and rotate its state. It is composed by three learnable parameters: &#x003D5;, &#x003B8;, &#x003C9;, which enables the gate to rotate any arbitrary state to any location on the Bloch sphere.</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M7"><mml:mtable columnalign='left'><mml:mtr><mml:mtd><mml:mtext>&#x000A0;&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mi>R</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>&#x003D5;</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x003B8;</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x003C9;</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>=</mml:mo><mml:mi>R</mml:mi><mml:mi>Z</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>&#x003C9;</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mi>R</mml:mi><mml:mi>Y</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>&#x003B8;</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mi>R</mml:mi><mml:mi>Z</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>&#x003D5;</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>=</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>&#x003D5;</mml:mi><mml:mo>+</mml:mo><mml:mi>&#x003C9;</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>/</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mi>cos</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>&#x003B8;</mml:mi><mml:mo>/</mml:mo><mml:mn>2</mml:mn><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>&#x003D5;</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mi>&#x003C9;</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>/</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mi>sin</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>&#x003B8;</mml:mi><mml:mo>/</mml:mo><mml:mn>2</mml:mn><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>&#x003D5;</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mi>&#x003C9;</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>/</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mi>sin</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>&#x003B8;</mml:mi><mml:mo>/</mml:mo><mml:mn>2</mml:mn><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>&#x003D5;</mml:mi><mml:mo>+</mml:mo><mml:mi>&#x003C9;</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>/</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mi>cos</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>&#x003B8;</mml:mi><mml:mo>/</mml:mo><mml:mn>2</mml:mn><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Since all the learnable parameters of the VQC are contained inside the rotation gates, and each gate has three parameters, the shape of the weight vector is <italic>w</italic>&#x02208;&#x0211D;<sup><italic>n</italic>&#x000D7;<italic>l</italic>&#x000D7;3</sup>, where <italic>n</italic> is the number of qubits of the current system and <italic>l</italic> is the number of layers in the network. As mentioned in the previous section, <italic>n</italic> will depend on the number of features in the data and <italic>l</italic> is a hyper-parameter (HP) to be tuned.</p>
<p>After rotating the qubits&#x00027; state, a collection of CNOT gates will be applied to entangle the qubits. The CNOT gate is a 2-qubit gate with no learnable parameters. It will flip the state of the so-called target-qubit, based on the value of the control-qubit, and it is usually represented by having two inputs as such: CNOT(control-qubit, target-qubit). Given the number of qubits, the CNOT arrangement is implemented as detailed in <xref ref-type="fig" rid="F14">Algorithm 1</xref>.</p>
<fig id="F14" position="float">
<label>Algorithm 1</label>
<caption><p>CNOT arrangement.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0014.tif"/>
</fig>
</sec>
<sec>
<title>2.3 Measurement</title>
<p>The output of the model is obtained by measuring the expectation value of the Pauli <inline-formula><mml:math id="M9"><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>&#x003C3;</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>z</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> operator in one of the qubits of the final state <inline-formula><mml:math id="M10"><mml:msubsup><mml:mrow><mml:mi>&#x003C8;</mml:mi></mml:mrow><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>. An example of the implementations of a VQC is represented in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>An example circuit for the VQC architecture used. It is comprised of two layers and three features as input. The three main stages of a QML model can be seen: embedding of the data, passing the data through the model circuit, and the measurement of the outcome.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0001.tif"/>
</fig></sec>
</sec>
<sec id="s3">
<title>3 Classical machine learning methods</title>
<p>Shallow CML methods are used to provide a baseline comparison to the QML models. The specific methods chosen for the comparison are Logistic Regression (LR) and Support Vector Machines (SVM), with these algorithms being trained with the same data as the QML algorithms.</p>
<p>All the classical methods were implemented using <italic>scikit-learn</italic> (Pedregosa et al., <xref ref-type="bibr" rid="B39">2011</xref>) library and, if not specified otherwise, the default parameters were used.</p>
<sec>
<title>3.1 Logistic regression</title>
<p>Logistic Regression is one of the simplest ML models and can be formulated as one of the basic building blocks of a neural network&#x02014;a single-layer perceptron. The goal is to find the best set of weights <italic>w</italic> that fit the data <italic>x</italic>:</p>
<disp-formula id="E3"><label>(3)</label><mml:math id="M11"><mml:mrow><mml:mover accent='true'><mml:mi>y</mml:mi><mml:mo>&#x0005E;</mml:mo></mml:mover><mml:mo stretchy='false'>(</mml:mo><mml:mi>w</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo>,</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>w</mml:mi><mml:mo>&#x000B7;</mml:mo><mml:mi>x</mml:mi><mml:mo>+</mml:mo><mml:mi>b</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
<p>where &#x00177; is the probability of an event to belong to class 1, <italic>w</italic>, and <italic>b</italic> are learnable parameters, and &#x003C3; is the sigmoid function.</p>
<p>The learning process is guided by minimizing the loss function, which in our case is the binary cross-entropy:</p>
<disp-formula id="E4"><label>(4)</label><mml:math id="M12"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mi mathvariant='double-struck'>E</mml:mi><mml:mi>x</mml:mi></mml:msub><mml:mo stretchy='false'>[</mml:mo><mml:mi>y</mml:mi><mml:mi>log</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mover accent='true'><mml:mi>y</mml:mi><mml:mo>&#x0005E;</mml:mo></mml:mover><mml:mo stretchy='false'>)</mml:mo><mml:mo>+</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x02212;</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mi>log</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x02212;</mml:mo><mml:mover accent='true'><mml:mi>y</mml:mi><mml:mo>&#x0005E;</mml:mo></mml:mover><mml:mo stretchy='false'>)</mml:mo><mml:mo stretchy='false'>]</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
<p>where <italic>y</italic> is the binary label of whether the event is of the class signal or not, and &#x1D53C;<sub><italic>x</italic></sub> is the expectation value over the training data, obtained using the event weights corresponding to each signal and background process.</p>
</sec>
<sec>
<title>3.2 Support vector machine</title>
<p>An SVM classifier is trained by finding the hyperplane that best separates two classes of data in the hyperspace of features. It does so by using support vectors, which are the data points from the two classes closer to the hyperplane, influencing the position and orientation of the hyperplane.</p>
<p>The loss function of an SVM revolves around the goal of maximizing the margin, i.e., the distance between the hyperplane and the nearest data point from either class. In other words, the goal is to find the hyperplane with the greatest possible margin between itself and any point within the training set, giving a greater chance of new data being classified correctly.</p>
<p>Just like the Logistic Regression, the base SVM classifier can only learn a linear decision boundary. However, classification problems are rarely simple enough for it to be separable using a hyperplane, thus usually requiring a non-linear separation. SVM can do this by transforming the data using a non-linear function, named kernel, after which it can be split by a hyperplane. For this implementation, the radial-basis function (RBF) was used as kernel. This endows the SVM with a non-linear mapping where it better separates the two classes using a hyperplane.</p>
</sec>
</sec>
<sec id="s4">
<title>4 Dataset</title>
<p>The dataset used in this work (Crispim Rom&#x000E3;o et al., <xref ref-type="bibr" rid="B20">2021</xref>) is comprised of simulated events of <italic>pp</italic> collisions at 13 TeV, in final states with two leptons, at least 1 <italic>b</italic>-jet, at least 1 large-<italic>R</italic> jet, and large scalar sum of transverse<xref ref-type="fn" rid="fn0003"><sup>3</sup></xref> momentum (<italic>p</italic><sub><italic>T</italic></sub>) of all reconstructed particles in the event (<italic>H</italic><sub><italic>T</italic></sub> &#x0003E; 500 GeV). Such basic selection corresponds to a topology commonly used in different searches for BSM events at the LHC (Crispim Rom&#x000E3;o et al., <xref ref-type="bibr" rid="B19">2021</xref>). The dominant SM background for this topology, <inline-formula><mml:math id="M13"><mml:mi>Z</mml:mi><mml:mi>b</mml:mi><mml:mover accent="true"><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mo>&#x00304;</mml:mo></mml:mover></mml:math></inline-formula>, and the BSM signal corresponding to <inline-formula><mml:math id="M14"><mml:mi>t</mml:mi><mml:mover accent="true"><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo>&#x00304;</mml:mo></mml:mover></mml:math></inline-formula> production with one of the top-quarks decaying via a flavor changing neutral current decay <italic>t</italic>&#x02192;<italic>qZ</italic> (<italic>q</italic> &#x0003D; <italic>c, u</italic>) (Durieux et al., <xref ref-type="bibr" rid="B24">2015</xref>), were considered. Such signal was chosen given the kinematic similitude to the background, thus providing a good benchmark for the present study.</p>
<p>Both samples were generated with <sc>MadGraph5 2.6.5</sc> (Alwall et al., <xref ref-type="bibr" rid="B3">2014</xref>) and <sc>Pythia 8.2</sc> (Sj&#x000F6;strand et al., <xref ref-type="bibr" rid="B48">2015</xref>), and the detector was simulated using <sc>Delphes 3</sc> (Selvaggi, <xref ref-type="bibr" rid="B46">2014</xref>) with the default CMS card. Jets were clustered using the anti-<italic>k</italic><sub><italic>t</italic></sub> algorithm (Cacciari et al., <xref ref-type="bibr" rid="B13">2008</xref>), implemented via <sc>FastJet</sc> (Cacciari et al., <xref ref-type="bibr" rid="B14">2012</xref>), with <italic>R</italic>-parameters of 0.5 and 0.8 (the latter for the large-<italic>R</italic> jets).</p>
<p>The following features were used for training of both the classical and quantum machine learning algorithms:</p>
<list list-type="bullet">
<list-item><p>(&#x003B7;, &#x003D5;, <italic>p</italic><sub><italic>T</italic></sub>, <italic>m, b</italic>-tag) of the five leading jets, ordered by decreasing <italic>p</italic><sub><italic>T</italic></sub>, with <italic>b</italic>-tag being a Boolean variable indicating if the jet is identified as originating from a <italic>b</italic>-quark by the <italic>b</italic>-tagging algorithm emulated by <sc>Delphes</sc>;</p></list-item>
<list-item><p>(&#x003B7;, &#x003D5;, <italic>p</italic><sub><italic>T</italic></sub>, <italic>m</italic>) of the leading large-<italic>R</italic> jet;</p></list-item>
<list-item><p><italic>N</italic>-subjettiness of leading large-<italic>R</italic> jet, &#x003C4;<sub><italic>n</italic></sub> with <italic>n</italic> &#x0003D; 1, &#x02026;, 5 (Thaler and Van Tilburg, <xref ref-type="bibr" rid="B50">2011</xref>).</p></list-item>
<list-item><p>(&#x003B7;, &#x003D5;, <italic>p</italic><sub><italic>T</italic></sub>) of the two leading leptons (electrons or muons);</p></list-item>
<list-item><p>transverse momentum (<inline-graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-i0001.tif"/>) and &#x003D5; of the missing transverse energy;</p></list-item>
<list-item><p>multiplicity of jets, large-<italic>R</italic> jets, electrons, and muons;</p></list-item>
<list-item><p><italic>H</italic><sub><italic>T</italic></sub>.</p></list-item>
</list>
<p>The proportion of signal and background events was kept the same as the original simulated data during training, being 13 and 87%, respectively. Additionally, the Monte-Carlo weights, corresponding to the theoretical prediction for each process at target luminosity of 150 fb<sup>&#x02212;1</sup>, were taken into account in the evaluation of all the considered metrics and loss functions.</p>
</sec>
<sec id="s5">
<title>5 Feature selection</title>
<p>As described in the previous section, a total of 47 features are available for training. Considering the type of data embedding chosen, 47 qubits would be needed to train a VQC using all the dataset features. Such number of qubits is impractical given the currently available quantum computers and thus it is not feasible to train a VQC using all the features in our dataset. For the purposes of the current study, quantum computers with only five qubits were considered and two methods for feature selection were implemented: principal component analysis (PCA) and sequential feature selection (SFS).</p>
<p>A relative comparison of the best five features<xref ref-type="fn" rid="fn0004"><sup>4</sup></xref> is shown in <xref ref-type="table" rid="T1">Table 1</xref> while the best performance obtained with state-of-the-art CML methods without any features or data points restrictions can be seen in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Top five features ranked by their AUC score on the training dataset.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left"><bold>Feature</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><inline-graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-i0001.tif"/></td>
<td valign="top" align="center">0.817</td>
</tr>
<tr>
<td valign="top" align="left">Lepton<sub>1</sub> <italic>p</italic><sub><italic>T</italic></sub></td>
<td valign="top" align="center">0.692</td>
</tr>
<tr>
<td valign="top" align="left">Lepton<sub>2</sub> <italic>p</italic><sub><italic>T</italic></sub></td>
<td valign="top" align="center">0.649</td>
</tr>
<tr>
<td valign="top" align="left">large-<italic>R</italic> jet <italic>m</italic></td>
<td valign="top" align="center">0.609</td>
</tr>
<tr>
<td valign="top" align="left">large-<italic>R</italic> jet &#x003C4;<sub>1</sub></td>
<td valign="top" align="center">0.576</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Obtained ROC curve and respective AUC score on the test dataset when training an Boosted Decision Tree, implemented with <italic>xgboost</italic> (Chen and Guestrin, <xref ref-type="bibr" rid="B18">2016</xref>) using the full set of features and data points. The classifier has an identical configuration as the one described in Section 5.1.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0002.tif"/>
</fig>
<sec>
<title>5.1 Sequential feature selection</title>
<p>SFS algorithms are a widely used family of greedy search algorithms used for automatically selecting a subset of features that is most relevant to the problem. This is achievable by removing or adding one feature at a time based on the classifier performance until a feature subset of the desired size, <italic>k</italic>, is reached.</p>
<p>There are different variations of SFS algorithms but for the current paper, the Sequential Backward Selection (SBS) algorithm was chosen. This algorithm starts with the full set of features (<italic>n</italic> &#x0003D; 47) and, at each iteration, it generates all possible feature subsets of size <italic>n</italic>&#x02212;1 and trains a ML model for each one of the subsets. The performance is subsequently evaluated and the feature that is absent from the subset of features with the highest performance metric is removed. This process is iterated until the feature subset contains <italic>k</italic> features.</p>
<p>This technique was used to find subsets of 1&#x02013;5 features. The ML model assisting the SBS was a boosted decision tree (BDT) with a maximum number of estimators set at 100 and a learning rate of 1 &#x000D7; 10<sup>&#x02212;5</sup>. The considered loss function was a logistic regression for binary classification and the AUC score was used as evaluation metric. The BDT was implemented using <italic>xgboost</italic> (Chen and Guestrin, <xref ref-type="bibr" rid="B18">2016</xref>) and the SBS algorithm using <italic>mlxtend</italic> (Raschka, <xref ref-type="bibr" rid="B41">2018</xref>). The selected features for the different values of <italic>k</italic> is shown in <xref ref-type="table" rid="T2">Table 2</xref> and the AUC scores for each feature in <xref ref-type="table" rid="T3">Table 3</xref>. It should be noted that <xref ref-type="table" rid="T2">Table 2</xref> shows the features selected with the SBS algorithm and <xref ref-type="table" rid="T3">Table 3</xref> shows the AUC value of each one of these features. The latter is ordered by descending AUC value.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>List of the features selected by the SBS algorithm for <italic>k</italic> &#x0003D; 1, &#x02026;, 5.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left"><bold><italic>k</italic></bold></th>
<th valign="top" align="left"><bold>Selected features</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left"><inline-graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-i0001.tif"/></td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left"><inline-graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-i0001.tif"/>, Number of muons</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left"><inline-graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-i0001.tif"/>, Number of muons, Jet<sub>1</sub> <italic>b</italic>-tag</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="left"><inline-graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-i0001.tif"/>, Number of muons, Jet<sub>1</sub> <italic>b</italic>-tag, Jet<sub>2</sub> <italic>p</italic><sub><italic>T</italic></sub></td>
</tr>
<tr>
<td valign="top" align="left">5</td>
<td valign="top" align="left"><inline-graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-i0001.tif"/>, Number of muons, Jet<sub>1</sub> <italic>b</italic>-tag, Jet<sub>2</sub> <italic>p</italic><sub><italic>T</italic></sub>, large-<italic>R</italic> &#x003C4;<sub>3</sub></td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Features selected by the SBS algorithm and their respective AUC score on the training dataset.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left"><bold>Feature</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><inline-graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-i0001.tif"/></td>
<td valign="top" align="center">0.817</td>
</tr>
<tr>
<td valign="top" align="left">Number of muons</td>
<td valign="top" align="center">0.534</td>
</tr>
<tr>
<td valign="top" align="left">Jet<sub>1</sub> <italic>b</italic>-tag</td>
<td valign="top" align="center">0.418</td>
</tr>
<tr>
<td valign="top" align="left">large-R jet &#x003C4;<sub>3</sub></td>
<td valign="top" align="center">0.316</td>
</tr>
<tr>
<td valign="top" align="left">Jet<sub>2</sub> <italic>p</italic><sub><italic>T</italic></sub></td>
<td valign="top" align="center">0.313</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>5.2 Principal component analysis</title>
<p>The PCA transforms a highly correlated, high-dimensional dataset and into a new one with reduced dimensionality and uncorrelated features, by rotating the dataset in the direction of the eigenvectors of the dataset covariance matrix. In the present paper, the PCA was performed only to remove the correlation between the features, maintaining the same dimensionality as the original data. The PCA transformation was learned from the training dataset and then applied to all datasets. When training a VQC for a specific number of features, the PCA components were ranked by AUC score and thus selected from the highest to the lowest. This is done by introducing a priority queue, i.e., if training a model using two features is desired, the two top-ranked PCA components will be selected. The <italic>scikit-learn</italic> PCA implementation was used and the obtained five better components are shown in <xref ref-type="table" rid="T4">Table 4</xref>.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Top five PCA components obtained with the training dataset, ranked by their AUC.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left"><bold>PCA component</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Component 3</td>
<td valign="top" align="center">0.726</td>
</tr>
<tr>
<td valign="top" align="left">Component 14</td>
<td valign="top" align="center">0.606</td>
</tr>
<tr>
<td valign="top" align="left">Component 5</td>
<td valign="top" align="center">0.565</td>
</tr>
<tr>
<td valign="top" align="left">Component 41</td>
<td valign="top" align="center">0.563</td>
</tr>
<tr>
<td valign="top" align="left">Component 43</td>
<td valign="top" align="center">0.560</td>
</tr></tbody>
</table>
</table-wrap></sec>
</sec>
<sec id="s6">
<title>6 Dataset size reduction</title>
<p>The present paper addresses the use of reduced datasets to overcome the limitation of NISQ processors while minimizing the loss of information and thus avoiding a performance loss of the QML algorithms in the HEP context. The primary method used for this purpose in the current study is <italic>KMeans</italic>, where the <italic>k</italic>th most representative points, i.e., a set of <italic>centroids</italic>, is obtained from the original dataset. Although these <italic>centroids</italic> are the most representative data points, they are not necessarily contained in the original dataset and, consequently, a resampling process, allowing to choose points of the original dataset (centrus), is required.</p>
<p>A study of the performance of the proposed dataset reduction method will be done by training a logistic regression model with the original dataset and comparing the results with those obtained when <italic>Kmeans</italic> and randomly undersampled datasets are used.</p>
<sec>
<title>6.1 KMeans algorithm</title>
<p>Considering a clustering algorithm, <italic>Kmeans</italic> iteratively tries to separate data into independent groups (MacKay, <xref ref-type="bibr" rid="B36">2003</xref>). This separation is done using the <italic>Lloyd&#x00027;s algorithm</italic> (Wilkin and Xiuzhen, <xref ref-type="bibr" rid="B54">2008</xref>), based on the minimal variability of samples within each cluster. The <italic>KMeans</italic> algorithm requires the specification of the desired number of clusters (<italic>k</italic>) a priori. The following steps were used:</p>
<list list-type="order">
<list-item><p><bold>Initialization of the centroids:</bold> using the <italic>scikit-learn</italic> implementation, it is possible to do it in two different ways, <italic>random</italic> and <italic>k-means&#x0002B;&#x0002B;</italic> (Vouros et al., <xref ref-type="bibr" rid="B52">2021</xref>):
<list list-type="bullet">
<list-item><p><bold>Random:</bold> <italic>k</italic> random samples of the original dataset are chosen.</p></list-item>
<list-item><p><bold>K-means&#x0002B;&#x0002B;:</bold> <italic>k</italic> samples of the original dataset are chosen based on a probabilistic approach, leading to the centroids being initialized far away from each other.</p></list-item>
</list>
<list list-type="simple">
<list-item><p>Assuming there is enough time, the algorithm will always converge, although the convergence to an absolute minimum is not guaranteed. The <italic>K-means&#x0002B;&#x0002B;</italic> initialization helps to address this issue. Furthermore, for both initializations, the algorithm, by default, runs several times with different centroid seeds, with the best result being the output.</p></list-item>
</list></p></list-item>
<list-item><p><bold>Assignment:</bold> Each data point <italic>x</italic><sub><italic>i</italic>:</sub> is addressed to a cluster <inline-formula><mml:math id="M15"><mml:msub><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:msup><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:msub></mml:math></inline-formula>, in such a way that the <italic>inertia</italic> is minimized:
<disp-formula id="E5"><label>(5)</label><mml:math id="M16"><mml:mrow><mml:msup><mml:mi>k</mml:mi><mml:mo>&#x02032;</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:munder><mml:mrow><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>g</mml:mi><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow><mml:mi>k</mml:mi></mml:munder><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>F</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munderover><mml:mrow><mml:msup><mml:mrow><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mi>&#x003BC;</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo stretchy='false'>)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mstyle></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
<list list-type="simple">
<list-item><p>where <italic>F</italic> is the dimensionality, i.e., the number of features, &#x003BC;<sub><italic>k</italic></sub> is the centroid of the cluster <italic>c</italic><sub><italic>k</italic></sub> and <italic>j</italic> stands for the (<italic>j</italic>&#x0002B;1)<italic>th</italic> feature.</p></list-item>
</list></p></list-item>
<list-item><p><bold>Update of the centroids&#x00027; position:</bold> The new centroids are just the means positions of each cluster, i.e.,
<disp-formula id="E6"><label>(6)</label><mml:math id="M17"><mml:mrow><mml:msub><mml:mi>&#x003BC;</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>:</mml:mo></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mstyle displaystyle='true'><mml:msubsup><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>:</mml:mo></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msub><mml:mi>c</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>:</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
<list list-type="simple">
<list-item><p>with <italic>n</italic><sub><italic>k</italic></sub> being the number of samples addressed to <italic>c</italic><sub><italic>k</italic></sub>. It should be noted that if <inline-formula><mml:math id="M18"><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:msup><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:math></inline-formula> the centroid <inline-formula><mml:math id="M19"><mml:msub><mml:mrow><mml:mi>&#x003BC;</mml:mi></mml:mrow><mml:mrow><mml:msup><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:msub></mml:math></inline-formula> doesn&#x00027;t change.</p></list-item>
</list></p></list-item>
<list-item><p><bold>Iteration:</bold> Steps 2 and 3 are repeated until the maximum number of iterations is reached or until the result converges, i.e., the centroids don&#x00027;t change.</p></list-item>
</list>
<p>The <italic>KMeans</italic> algorithm was used separately for the signal and background samples, with the corresponding weights being used.</p>
</sec>
<sec>
<title>6.2 Dataset resampling</title>
<p>As previously mentioned, although centroids are the most representative points, they are not necessarily contained in the original dataset. Hence, it was chosen to consider 10 neighbors of each centroid to determine each centrus, i.e., the 10 nearest points of the original dataset.</p>
<p>The position of each centrus was determined using the weighted mean of the position of the neighbors,</p>
<disp-formula id="E7"><label>(7)</label><mml:math id="M20"><mml:mrow><mml:mi>W</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mstyle displaystyle='true'><mml:msubsup><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mn>9</mml:mn></mml:msubsup><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>:</mml:mo></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle displaystyle='true'><mml:msubsup><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mn>9</mml:mn></mml:msubsup><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mstyle></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
<p>where <italic>W</italic> is the mean position, <italic>x</italic><sub><italic>i</italic>:</sub> is the (<italic>i</italic>&#x0002B;1)<italic>th</italic> nearest point and <italic>w</italic><sub><italic>i</italic></sub> the weight of the sample.</p>
<p>The sample weight of each centrus was calculated based on the number of samples of the same label (i.e., signal or background) on the original dataset:</p>
<disp-formula id="E8"><label>(8)</label><mml:math id="M21"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>with <italic>w</italic><sub><italic>i</italic></sub> being the weight of the (<italic>i</italic>&#x0002B;1)<italic>th</italic> centrus and <italic>n</italic> the number of samples in the original dataset with the same label of this centrus.</p>
</sec>
</sec>
<sec id="s7">
<title>7 Quantum and classical machine learning training</title>
<p>The training of the QML algorithms used in the current paper requires the use of optimizers. Two different ones were considered: <italic>Adam</italic> (Kingma and Ba, <xref ref-type="bibr" rid="B33">2014</xref>) and tree-structured Parzen estimator sampler (<italic>TPE</italic>; Bergstra et al., <xref ref-type="bibr" rid="B10">2011</xref>, <xref ref-type="bibr" rid="B9">2013</xref>).</p>
<p>The <italic>Adam</italic> optimizer uses an extension of stochastic gradient descent, leveraging techniques such as adaptive moment estimation, being extensively used in optimization problems, namely in the context of machine learning. Nonetheless, since there is no reason to expect, <italic>a priori</italic>, that it will work equally well in the context of QML, where specific challenges are expected, the <italic>TPE</italic> optimizer was also tested.</p>
<p>The <italic>TPE</italic> is a Bayesian optimization algorithm first developed for HP tuning in the context of machine learning. In the current study, it will be used to optimize VQC weights in a way very similar to what is typically done for HP tuning. <italic>TPE</italic> is implemented using <italic>Optuna</italic> (Akiba et al., <xref ref-type="bibr" rid="B1">2019</xref>), a library focused on HP optimization for machine learning models. <italic>TPE</italic> works by choosing a parameter candidate that maximizes the likelihood ratio between a Gaussian Mixture Model (GMM) fitted to the set of parameters associated with the best objective values, with another GMM being fitted to the remaining parameter values. In the context of HEP, <italic>TPE</italic> has also been used to explore parameter spaces of BSM models (de Souza et al., <xref ref-type="bibr" rid="B22">2022</xref>).</p>
<p>Different machine learning methods were optimized, namely a LR, a SVM, and a VQC. The corresponding training was done for the set of HP summarized in <xref ref-type="table" rid="T5">Table 5</xref>, where the scanned values are also listed. For each set of HP, 5 models were trained on 5 different subsets of the initial dataset (random sampling).</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>List of scanned hyperparameters.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left"><bold>Variable HP</bold></th>
<th valign="top" align="center"><bold>Possible values</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Feature selection</td>
<td valign="top" align="center">[PCA, SBS]</td>
</tr>
<tr>
<td valign="top" align="left">Number of data points</td>
<td valign="top" align="center">[100, 500, 1 k, 5 k]</td>
</tr>
<tr>
<td valign="top" align="left">Number of features</td>
<td valign="top" align="center">[1, 2, 3, 4, 5]</td>
</tr>
<tr>
<td valign="top" align="left">Number of layers</td>
<td valign="top" align="center">[1, 2, 3, 4, 5]</td>
</tr>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<td valign="top" align="left"><bold>Fixed HP</bold></td>
<td valign="top" align="center"><bold>Fixed values</bold></td>
</tr>
<tr>
<td valign="top" align="left">Max epochs</td>
<td valign="top" align="center">500</td>
</tr>
<tr>
<td valign="top" align="left">Batch size</td>
<td valign="top" align="center">Size of the dataset</td>
</tr>
<tr>
<td valign="top" align="left">Learning rate (LR)</td>
<td valign="top" align="center">0.03</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The LR parameter is used only for the VQC optimized by Adam while the number of layers is only used by the VQCs.</p>
</table-wrap-foot>
</table-wrap>
<p>For both optimizers, the considered cost function used is the squared error, with the individual Monte Carlo samples being properly weighted. During the training of VQCs, the inference was done on the validation dataset at five epoch intervals, the AUC computed and only the best-performing model, according to the previously mentioned metric, was considered.</p>
<sec>
<title>7.1 <italic>Adam</italic> implementation details</title>
<p>The training starts with the initialization of the weight vector. This is done randomly with an order of magnitude of 10<sup>&#x02212;2</sup>, which is followed by training iterations until a maximum number of epochs is reached. At each iteration, the model is inferred with the training dataset, the cost function calculated and the model parameters updated via the <italic>Adam</italic> optimizer. A summary of <italic>Adam</italic>-optimized VQC training is shown in <xref ref-type="fig" rid="F15">Algorithm 2</xref>.</p>
<fig id="F15" position="float">
<label>Algorithm 2</label>
<caption><p><italic>Adam</italic> training.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0015.tif"/>
</fig>
</sec>
<sec>
<title>7.2 <italic>TPE</italic> implementation details</title>
<p>We use the <italic>Optuna</italic> implementation of the <italic>TPE</italic> sampler. Being a Bayesian optimization algorithm, <italic>TPE</italic> works very differently to <italic>Adam</italic>, which is a gradient descent algorithm. In <italic>TPE</italic>, for every training iteration, each parameter is replaced by a new value acquired sampling from a Gaussian Mixture Model of good points, which is then used to compute the loss function. At each epoch, the algorithm computes new values for the model parameters. With the value of the loss function of the suggested parameters, <italic>TPE</italic> will update its internal Gaussian Mixture Models of good and bad points, which will allow it to learn what are good suggestions as more parameter values are sampled. Since <italic>TPE</italic> is a Bayesian algorithm, it does not need to compute derivatives of the loss function, as <italic>Adam</italic> does, which might allow for a light workload when running trainings on quantum computers.</p>
</sec>
</sec>
<sec id="s8">
<title>8 Simulation results</title>
<sec>
<title>8.1 Feature reduction</title>
<p>The results indicate that QML circuits trained with SBS data are generally unstable and very susceptible to fluctuations in the randomly sampled data, as can be seen in <xref ref-type="fig" rid="F3">Figure 3</xref>. Specifically, it is evident that using PCA-originated data produces significantly more stable results.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Plot grid representing the results for both <italic>Adam</italic> and <italic>TPE</italic>-Trained VQCs. Each data point represents the AUC score on the test dataset of a different set of HP, as listed in <xref ref-type="table" rid="T5">Table 5</xref>. The error bar represents the standard deviation associated with each data point since each point is the average of five different random samplings from the data.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0003.tif"/>
</fig>
<p>The performance of both optimizers, <italic>Adam</italic> and <italic>TPE</italic>, is usually saturated with only two layers. This effect is most noticeable when the number of features is greater or equal to 3. When considering only the PCA-obtained results, the two optimizers are compatible for most of the configurations tested. Exceptions occur when using a high number of features (&#x02265;4) and only one layer, where <italic>TPE</italic> outperforms <italic>Adam</italic>, and when using a high number of features (&#x02265;4) and more than one layer, where the opposite happens and <italic>Adam</italic> outperforms <italic>TPE</italic>.</p>
<p>The shallow ML methods trained on the same data as the VQCs are shown in <xref ref-type="fig" rid="F4">Figure 4</xref>. The AUC scores obtained in this case are more stable for both the PCA and SBS datasets. The performance in both cases is saturated when using two features and the models trained with SBS data outperform the PCA-trained models, contrary to what was observed for the QML case. It should also be noted that the SVM outperforms LR in all cases except when only one feature is used, which is not surprising since SVMs are more sophisticated classifiers.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>Plot grid representing the results for the considered shallow methods. Each data point represents the AUC score on the test dataset of a different set of HP, as listed in <xref ref-type="table" rid="T5">Table 5</xref>. The error bar represents the standard deviation associated with each data point since each point is the average of five different random samplings from the data.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0004.tif"/>
</fig>
<p>For the best set of HP, VQCs trained using <italic>TPE</italic> and <italic>Adam</italic> have performed similarly to the shallow ML methods (c.f. with <xref ref-type="fig" rid="F5">Figure 5</xref>, <xref ref-type="supplementary-material" rid="SM1">Figure A1</xref>, respectively). It was also observed that there are no cases where QML outperforms any of the shallow methods tested. The <italic>TPE</italic> optimizer regime produced the best performance for QML, achieving an AUC score of 0.841 &#x000B1; 0.051, as shown in <xref ref-type="fig" rid="F5">Figure 5</xref>.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>ROC of the best HP set, using <italic>TPE</italic>&#x00027;s QML model average AUC score as a metric and the corresponding shallow methods ROCs for the same data. The HP for this run are SBS for feature method, 100 data points, 1 feature, and 5 VQC layers. The different colors indicate the different random samplings of the data.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0005.tif"/>
</fig>
<p>The reduction algorithms studied come with an additional computational cost compared to using the original dataset directly. In particular, the SBS algorithm added an overhead of 1 h for running the XGBoost algorithm and selecting the features with more classification power. On the other hand, the PCA algorithm took a sub-minute negligible time to complete. However, since these algorithms only need to be run once, before the training, and given that the grid search for the VQC, SVM, and LR algorithms took over 200 CPU hours on a dual-Intel(R) Xeon(R) Gold 6,348 machine, in the end the computational cost of the classical reduction algorithms is negligible.</p>
<sec>
<title>8.1.1 VQC&#x00027;s robustness to discrete features</title>
<p>In the previous section it was noted that there was a significant variability in the final score of QML models, especially when training with SBS data. In fact, VQCs, being variational algorithms, are highly susceptible to small fluctuations in the data, which can have a correspondingly significant impact on the computed AUC. Additionally, numerical instabilities caused by computational floating point accuracy were observed during the validation step, leading to considerable fluctuations in the computed AUC in this regime.</p>
<p>To further investigate this behavior, which was not observed at the same level on the PCA-trained circuits, we looked at the AUC distributions produced by QML models as a function of the number of features. As shown in <xref ref-type="fig" rid="F6">Figure 6</xref>, it is clear that the instability in SBS results occurs when more than two features are used. The biggest difference in the AUC mean is found for four features, where the value for SBS is 0.471 &#x000B1; 0.129 and for PCA is 0.719 &#x000B1; 0.096. The smallest difference is found for 1 feature, where the value for SBS is 0.814 &#x000B1; 0.035 and for PCA is 0.724 &#x000B1; 0.037.</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>Distribution of the AUC values obtained for the QML model as a function of the number of features used in training, evaluated on the test dataset, for SBS and PCA inputs.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0006.tif"/>
</fig>
<p>Additionally, we produced visualizations of the decision regions of the models trained using both feature selection methods. We focused on runs that used two features, as this is where the problem originated. <xref ref-type="fig" rid="F7">Figures 7</xref>, <xref ref-type="fig" rid="F8">8</xref> show the decision regions obtained with each model for one representative run, illustrating the sensitivity of each boundary to variations in the data, for SBS or PCA. The SBS features used are listed in <xref ref-type="table" rid="T2">Table 2</xref>, where the second feature, the number of muons in the event, is a discrete variable.<xref ref-type="fn" rid="fn0005"><sup>5</sup></xref></p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>Decision regions of the three different architectures in a run where large variability of results for the QML SBS-trained model was observed. This case uses SBS data, Adam as an optimizer, 100 data points for training and two layers for the circuit.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0007.tif"/>
</fig>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>Decision regions of the three different architectures in a run where large variability of results for the QML PCA-trained model was observed. This case uses PCA data, Adam as an optimizer, 100 data points for training and one layer for the circuit.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0008.tif"/>
</fig>
<p>While LR and SVMs are robust in the presence of discrete variables, they may pose a challenge for continuous learning algorithms such as VQCs. It is therefore possible that the variability observed when using different sub-samples of SBS data could be attributed to the use of this discrete variable. To investigate this, we conducted the SBS feature selection once again, this time excluding all discrete variables&#x02014;yielding <xref ref-type="table" rid="T6">Table 6</xref>. The VQC circuits where once again trained using this modified list of inputs in a limited study of two features only, as illustrated in <xref ref-type="fig" rid="F9">Figure 9</xref>.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>Features selected by the SBS algorithm and their respective AUC Score on the training dataset with all the discrete features removed.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:&#x00023;919498;color:&#x00023;ffffff">
<th valign="top" align="left"><bold>Feature</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><inline-graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-i0001.tif"/></td>
<td valign="top" align="center">0.817</td>
</tr>
<tr>
<td valign="top" align="left">large-R jet &#x003C4;<sub>1</sub></td>
<td valign="top" align="center">0.576</td>
</tr>
<tr>
<td valign="top" align="left">large-R jet &#x003C4;<sub>3</sub></td>
<td valign="top" align="center">0.316</td>
</tr>
<tr>
<td valign="top" align="left">Jet<sub>2</sub> <italic>p</italic><sub><italic>T</italic></sub></td>
<td valign="top" align="center">0.313</td>
</tr>
<tr>
<td valign="top" align="left">Jet<sub>1</sub> <italic>p</italic><sub><italic>T</italic></sub></td>
<td valign="top" align="center">0.292</td>
</tr></tbody>
</table>
</table-wrap>
<fig id="F9" position="float">
<label>Figure 9</label>
<caption><p>Plot grid representing the results for both <italic>Adam</italic> and <italic>TPE</italic>-Trained VQCs. Each data point represents the AUC score on the test dataset of a different set of HP, as listed in <xref ref-type="table" rid="T5">Table 5</xref>, with the two features restriction. The error bar represents the standard deviation associated with each data point since each point is the average of five different random samplings from the data.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0009.tif"/>
</fig>
<p>Using the discrete-free SBS version to train the VQC led to significantly better AUC scores, outperforming PCA-trained QML models with an average AUC score of around 0.85, although still with larger variability than that of the PCA-trained VQCs. This is a notable departure from our previous observations in Section 8.1, where including discrete features in the SBS feature selection methodology resulted in erratic performance with no instance of outperforming PCA (except in cases where only one continuous feature was used). Therefore, we found that excluding discrete variables during feature selection led to better performance for VQC circuits in a limited study of two features, compared to when discrete variables were included. This indicates that the choice of input features is crucial for achieving high accuracy in quantum machine learning, and future studies should consider the impact of discrete variables on VQC performance. The findings may inform future choices in selecting input features for VQC circuits to optimize model performance.</p>
</sec>
</sec>
<sec>
<title>8.2 Dataset reduction</title>
<sec>
<title>8.2.1 Implementation of <italic>KMeans</italic></title>
<p>The performance of the <italic>KMeans</italic> algorithm was tested initially by training LR models with 10 reduced datasets and selecting a different number of <italic>k</italic> features (<italic>k</italic>&#x02208;[1, 2, 3, 4, 5]) obtained with the SBS algorithm. The <italic>KMeans</italic> algorithm considers the sample weight and, in order to have an equal number of signal and background centroids, it was separately applied to the signal and background data. Since state-of-the-art quantum computing requires small datasets, the data reduction studies were done for datasets with 100, 500, 1,000, and 5,000 data points and the number of features previously mentioned.</p>
<p>Two configurations were studied: the framework presented in Section 6.1 was applied to the training and test datasets; and only to the training datasets (with test datasets obtained through random undersampling).<xref ref-type="fn" rid="fn0006"><sup>6</sup></xref></p>
<p>The mean AUC score and respective standard deviation found using <italic>KMeans</italic> for train and test datasets are summarized in <xref ref-type="fig" rid="F10">Figure 10</xref>. The results obtained using the <italic>KMeans</italic> algorithm for the training dataset and random undersampling in the test signal and background samples are presented in <xref ref-type="fig" rid="F11">Figure 11</xref>. In order to provide a benchmark point for comparison with the performance of the reduced datasets, a LR model was trained on the full original dataset, with results shown in both figures.</p>
<fig id="F10" position="float">
<label>Figure 10</label>
<caption><p>Average AUC score and corresponding standard deviation, represented as uncertainty bands, for different numbers of clusters as a function of the number of features. The training and testing datasets were reduced using the <italic>KMeans</italic> algorithm. In each case, 10 different reduced test datasets were used.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0010.tif"/>
</fig>
<fig id="F11" position="float">
<label>Figure 11</label>
<caption><p>Average AUC score and corresponding standard deviation, represented as uncertainty bands, for different numbers of clusters as a function of the number of features. The training dataset was reduced using the <italic>KMeans</italic> algorithm. In each case, 10 different randomly undersampled test datasets were used.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0011.tif"/>
</fig>
<p>It can be seen in <xref ref-type="fig" rid="F10">Figures 10</xref>, <xref ref-type="fig" rid="F11">11</xref> that using the <italic>KMeans</italic> algorithm to reduce the training dataset results in AUC scores that are compatible with the performance obtained using the full original dataset.</p>
<p>This study shows that although <italic>KMeans</italic> is a more sophisticated algorithm for data reduction than random undersampling, in the HEP case under consideration no significant deterioration of the performance is observed when using it, suggesting that in this study the dataset composed of prototypes is a good representative of the whole dataset in the small dataset regime, which is explored in this work.</p>
</sec>
<sec>
<title>8.2.2 Application to QML</title>
<p>The QML, SVM, and LR models were trained using <italic>KMeans</italic> reduced datasets as well as random undersampling, for different dataset sizes. In this comparison, the HP for the VQC are the ones previously found to be the best, i.e., one feature chosen with the SBS method and five VQC layers for the architecture. The metric used to compare all models is the AUC score average of five different runs.</p>
<p>For all cases, the test and validation sets were reduced using random undersampling, hence, for each dataset size there are one train, five validation and five test datasets. The choice to keep random sampling for the test dataset, rather than <italic>KMeans</italic> reduction, is to ensure that our methodology represents the test samples as close to the original dataset as possible, ensuring that sophisticated resampling techniques do not significantly modify the data.</p>
<p>The obtained results are shown in <xref ref-type="fig" rid="F12">Figure 12</xref>. It can be seen that the performance for the <italic>KMeans</italic> reduced dataset is compatible with the one obtained using the dataset reduced through random undersampling, for QML and CML models. Furthermore, the performance achieved by the simulated VQCs is identical within the statistical uncertainties to the performances by the SVM and LR, in agreement to what was observed in Section 8.1.</p>
<fig id="F12" position="float">
<label>Figure 12</label>
<caption><p>Comparison between the QML, SVM, and LR models when trained with the <italic>TPE</italic> and the best set of HP for different dataset sizes for both random undersampling (regular) and <italic>KMeans</italic> reduced datasets.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0012.tif"/>
</fig>
<p>Nonetheless, it should be emphasized that the model trained with random undersampling needs to be trained several times for achieving these average scores, as many times as the number of reduced datasets used. On the other hand, the models using the <italic>KMeans</italic> reduced dataset need to be trained only once. This can be relevant in the context of quantum computers, where access is often subject to long queues and thus the number of accesses can be a limiting factor. While the <italic>KMeans</italic> reduction technique brought an overall increase in time of around 1%, this change is negligible taking into account the reduction in number of accesses.</p>
</sec>
</sec>
</sec>
<sec id="s9">
<title>9 Real quantum computers results</title>
<p>Until this point, only simulated quantum environments were used. In order to test the performance in real quantum computers, and thus validate the simulation results, the <italic>Pennylane</italic> framework was used as the integration layer with <italic>Qiskit</italic>, which works effectively as a direct API to the quantum computers provided by IBM.</p>
<p>In this study, only the best performant model HP-set was used, i.e., the <italic>TPE</italic>-trained VQC. This VQC was implemented and its test set was inferred on six different quantum systems with identical architectures, all freely available. Evaluating our model in multiple identical quantum systems allows us to get an idea of the scale of the associated systematic uncertainty via the variability of the observed results. Since the implemented circuits are small, no error mitigation techniques were implemented. IBM&#x00027;s transpiler optimization level was set to 3<xref ref-type="fn" rid="fn0007"><sup>7</sup></xref> (Anis et al., <xref ref-type="bibr" rid="B4">2023</xref>) and, for each event, the final expectation value was computed by averaging 20<italic>k</italic> shots on the quantum computer. The obtained results, shown in <xref ref-type="fig" rid="F13">Figure 13</xref>, are compatible with the simulated ones (<xref ref-type="fig" rid="F5">Figure 5</xref>).</p>
<fig id="F13" position="float">
<label>Figure 13</label>
<caption><p>Final ROC curve of the best-performing model when inferred on the test dataset in six different IBM systems. The average AUC scores and the corresponding standard deviations are also shown. The colors in each subplot stand for different runs of the same circuits in the same QC.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-06-1268852-g0013.tif"/>
</fig></sec>
<sec sec-type="conclusions" id="s10">
<title>10 Conclusion</title>
<p>In this paper, we assessed the feasibility of using large datasets in QML applications by exploring data reduction techniques. To allow for a fair comparison between CML and QML models, we opted to use shallow classical methods as opposed to deep methods, which require large datasets that are not viable given the limitations of the current quantum computers. Our results indicate that there is comparable performance between CML and QML when tested on the same small dataset regime.</p>
<p>To achieve this, our study first compared feature selection techniques, showing that while SBS can produce the best performant QML model, it generally yielded worse and more unstable results than PCA. Additionally, we found this was produced by using discrete variables in VQCs, highlighting the suitability of PCA-transformed data for QML applications in the HEP context, where discrete variables are commonly used to describe collider events.</p>
<p>Our grid search over different HP combinations of VQC ran in simulation provided no evidence of quantum advantage in our study. We confirmed the results by running the best performing configuration on real-world quantum systems, obtaining compatible performances and therefore validating our conclusions. We compared the performance of TPE and Adam optimizers in QML and found that TPE achieves competitive results. Being a gradient-free optimizer, TPE offers the advantage that it can lead to faster training with a smaller memory usage when compared to Adam, which in principle can further facilitate the application of QML in current quantum computers.</p>
<p>We then explored data reduction techniques, finding that reducing the dataset size with the KMeans algorithm produces results that are similar to those obtained from random undersampling. This finding is significant in that it means that the model can achieve similar performance with fewer accesses to a quantum computer during training, which is a considerable bottleneck in current QML.</p>
<p>In conclusion, while our study found no evidence of quantum advantage in the current state of QML within the context of large HEP datasets, the performance of QML models was comparable to that of classical machine learning models when restricted to small dataset regimes. Our findings suggest that using dataset reduction techniques enables us to use large datasets more efficiently to train VQCs, facilitating the usage of current quantum computers in large datasets often found in HEP.</p>
</sec>
<sec sec-type="data-availability" id="s11">
<title>Data availability statement</title>
<p>The dataset used in the studies reported in the current paper can be found in <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/doi/10.5281/zenodo.5126746">https://zenodo.org/doi/10.5281/zenodo.5126746</ext-link>, while the computational code used to obtain the present results is publicly available via <ext-link ext-link-type="uri" xlink:href="https://github.com/mcpeixoto/QML-HEP">https://github.com/mcpeixoto/QML-HEP</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="s12">
<title>Author contributions</title>
<p>MP: Conceptualization, Data curation, Formal analysis, Investigation, Project administration, Software, Validation, Visualization, Writing&#x02014;original draft, Writing&#x02014;review &#x00026; editing. NC: Conceptualization, Data curation, Funding acquisition, Investigation, Methodology, Resources, Supervision, Writing&#x02014;original draft, Writing&#x02014;review &#x00026; editing. MR: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Software, Supervision, Validation, Writing&#x02014;original draft, Funding acquisition, Writing&#x02014;review &#x00026; editing. MO: Conceptualization, Formal analysis, Investigation, Project administration, Software, Validation, Visualization, Writing&#x02014;original draft, Writing&#x02014;review &#x00026; editing. IO: Conceptualization, Formal analysis, Funding acquisition, Investigation, Methodology, Resources, Supervision, Writing&#x02014;review &#x00026; editing.</p>
</sec>
</body>
<back>
<sec sec-type="funding-information" id="s13">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work was supported by Funda&#x000E7;&#x000E3;o para a Ci&#x000EA;ncia e a Tecnologia, Portugal, through project CERN/FIS-COM/0004/2021 (&#x0201C;Exploring quantum machine learning as a tool for present and future high energy physics colliders&#x0201D;). IO was supported by the fellowship LCF/BQ/PI20/11760025 from La Caixa Foundation (ID 100010434) and by the European Union Horizon 2020 research and innovation program under the Marie Sk&#x00142;odowska-Curie grant agreement No 847648.</p>
</sec>
<ack><p>We acknowledge the use of IBM Quantum services for this work. We thank Declan Millar, Nuno Peres, and Tiago Ant&#x000E3;o for the very useful discussions and Ricardo Ribeiro for kindly providing access to some of the computing systems used in this work. We also thank Henrique Carvalho for the help in producing (<xref ref-type="fig" rid="F1">Figure 1</xref>).</p>
</ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest. The handling editor ML declared a past co-authorship/collaboration with authors NC and IO. The author(s) declared that they were an editorial board member of Frontiers, at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec sec-type="disclaimer" id="s14">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="disclaimer" id="s15">
<title>Author disclaimer</title>
<p>The views expressed are those of the authors, and do not reflect the official policy or position of IBM or the IBM Quantum team.</p>
</sec>
<sec sec-type="supplementary-material" id="s16">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/frai.2023.1268852/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/frai.2023.1268852/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Presentation_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/></sec>
<fn-group>
<fn id="fn0001"><p><sup>1</sup><italic>Classical</italic> is used throughout the paper as opposed to <italic>quantum</italic> machine learning.</p></fn>
<fn id="fn0002"><p><sup>2</sup>Even if, in general, the phase shift gate <italic>P</italic>(<italic>w</italic>) should be included, this gate does not change the final outcome (i.e., it does not impact probabilities), so it can be discarded.</p></fn>
<fn id="fn0003"><p><sup>3</sup>The transverse plane is defined with respect to the proton colliding beams.</p></fn>
<fn id="fn0004"><p><sup>4</sup>The area under the curve (AUC) of the receiver operating characteristic curve (ROC) is considered as metric for these comparisons.</p></fn>
<fn id="fn0005"><p><sup>5</sup>In ML literature this is called a categorical variable. However, we note that even though it is categorical, it is still ordinal. As there are no non-ordinal or non-binary categorical variables in our dataset, we will refer to these variables as discrete instead of categorical for the rest of this work.</p></fn>
<fn id="fn0006"><p><sup>6</sup>Throughout this article random undersampling refers to the random selection of data points from the original dataset. In the ML subfield of imbalanced learning, the proper methodology is to use resampling algorithms only during training, but not during validation or test. In this section we present results of these two cases as a comparison, but later we will restrict to random undersampling during validation and testing.</p></fn>
<fn id="fn0007"><p><sup>7</sup>The level 3 of optimization corresponds to the heaviest optimization inherently implemented.</p></fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Akiba</surname> <given-names>T.</given-names></name> <name><surname>Sano</surname> <given-names>S.</given-names></name> <name><surname>Yanase</surname> <given-names>T.</given-names></name> <name><surname>Ohta</surname> <given-names>T.</given-names></name> <name><surname>Koyama</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Optuna: a next-generation hyperparameter optimization framework,&#x0201D;</article-title> in <source>Proceedings of the 25rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source> (<publisher-loc>Anchorage, AK</publisher-loc>). <pub-id pub-id-type="doi">10.1145/3292500.3330701</pub-id></citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alvi</surname> <given-names>S.</given-names></name> <name><surname>Bauer</surname> <given-names>C. W.</given-names></name> <name><surname>Nachman</surname> <given-names>B.</given-names></name></person-group> (<year>2023</year>). <article-title>Quantum anomaly detection for collider physics</article-title>. <source>J. High Energy Phys</source>. <volume>2023</volume>:<fpage>220</fpage>. <pub-id pub-id-type="doi">10.1007/JHEP02(2023)220</pub-id><pub-id pub-id-type="pmid">36852337</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alwall</surname> <given-names>J.</given-names></name> <name><surname>Frederix</surname> <given-names>R.</given-names></name> <name><surname>Frixione</surname> <given-names>S.</given-names></name> <name><surname>Hirschi</surname> <given-names>V.</given-names></name> <name><surname>Maltoni</surname> <given-names>F.</given-names></name> <name><surname>Mattelaer</surname> <given-names>O.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>The automated computation of tree-level and next-to-leading order differential cross sections, and their matching to parton shower simulations</article-title>. <source>J. High Energy Phys</source>. <volume>7</volume>:<fpage>79</fpage>. <pub-id pub-id-type="doi">10.1007/JHEP07(2014)079</pub-id></citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Anis</surname> <given-names>M. S.</given-names></name> <name><surname>Abby-Mitchell Abraham</surname> <given-names>H.</given-names></name> <name><surname>AduOffei Agarwal</surname> <given-names>R.</given-names></name> <name><surname>Agliardi</surname> <given-names>G.</given-names></name> <name><surname>Aharoni</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2023</year>). <source>QISKIT: An Open-Source Framework for Quantum Computing.</source> <pub-id pub-id-type="doi">10.5281/zenodo.2573505</pub-id></citation>
</ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Araz</surname> <given-names>J. Y.</given-names></name> <name><surname>Spannowsky</surname> <given-names>M.</given-names></name></person-group> (<year>2022</year>). <article-title>Classical versus quantum: Comparing tensor-network-based quantum circuits on large hadron collider data</article-title>. <source>Phys. Rev. A</source> <volume>106</volume>:<fpage>62423</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevA.106.062423</pub-id></citation>
</ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bapst</surname> <given-names>F.</given-names></name> <name><surname>Bhimji</surname> <given-names>W.</given-names></name> <name><surname>Calafiura</surname> <given-names>P.</given-names></name> <name><surname>Gray</surname> <given-names>H.</given-names></name> <name><surname>Lavrijsen</surname> <given-names>W.</given-names></name> <name><surname>Linder</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>A pattern recognition algorithm for quantum annealers</article-title>. <source>Comput. Softw. Big Sci</source>. <volume>4</volume>, <fpage>1</fpage>&#x02013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1007/s41781-019-0032-5</pub-id></citation>
</ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Belis</surname> <given-names>V</given-names></name> <name><surname>Gonz&#x000E1;lez-Castillo</surname> <given-names>S.</given-names></name> <name><surname>Reissel</surname> <given-names>C.</given-names></name> <name><surname>Vallecorsa</surname> <given-names>S.</given-names></name> <name><surname>Combarro</surname> <given-names>E. F.</given-names></name> <name><surname>Dissertori</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;HIGGS analysis with quantum classifiers,&#x0201D;</article-title> in <source>EPJWeb of Conferences</source>, Vol. <volume>251</volume> (EDP Sciences), <fpage>03070</fpage>. <pub-id pub-id-type="doi">10.1051/epjconf/202125103070</pub-id></citation>
</ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bergholm</surname> <given-names>V.</given-names></name> <name><surname>Izaac</surname> <given-names>J.</given-names></name> <name><surname>Schuld</surname> <given-names>M.</given-names></name> <name><surname>Gogolin</surname> <given-names>C.</given-names></name> <name><surname>Ahmed</surname> <given-names>S.</given-names></name> <name><surname>Ajith</surname> <given-names>V.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>PennyLane: automatic differentiation of hybrid quantum-classical computations</article-title>. <source>arXiv:1811.04968v4</source>. <pub-id pub-id-type="doi">10.48550/arXiv.1811.04968</pub-id></citation>
</ref>
<ref id="B9">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Bergstra</surname> <given-names>J.</given-names></name> <name><surname>Yamins</surname> <given-names>D.</given-names></name> <name><surname>Cox</surname> <given-names>D.</given-names></name></person-group> (<year>2013</year>). <article-title>&#x0201C;Making a science of model search: hyperparameter optimization in hundreds of dimensions for vision architectures,&#x0201D;</article-title> in <source>International Conference on Machine Learning</source> (San Francisco), <fpage>115</fpage>&#x02013;<lpage>123</lpage>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://proceedings.mlr.press/v28/bergstra13.html">https://proceedings.mlr.press/v28/bergstra13.html</ext-link></citation>
</ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bergstra</surname> <given-names>J.</given-names></name> <name><surname>Bardenet</surname> <given-names>R.</given-names></name> <name><surname>Bengio</surname> <given-names>Y.</given-names></name> <name><surname>K&#x000E9;gl</surname> <given-names>B.</given-names></name></person-group> (<year>2011</year>). <article-title>&#x0201C;Algorithms for hyper-parameter optimization,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems, Vol. 24</source>.</citation>
</ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Blance</surname> <given-names>A.</given-names></name> <name><surname>Spannowsky</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>Quantum machine learning for particle physics using a variational quantum classifier</article-title>. <source>J. High Energy Phys</source>. <volume>2021</volume>, <fpage>1</fpage>&#x02013;<lpage>20</lpage>. <pub-id pub-id-type="doi">10.1007/JHEP02(2021)212</pub-id></citation>
</ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Borras</surname> <given-names>K.</given-names></name> <name><surname>Chang</surname> <given-names>S. Y.</given-names></name> <name><surname>Funcke</surname> <given-names>L.</given-names></name> <name><surname>Grossi</surname> <given-names>M.</given-names></name> <name><surname>Hartung</surname> <given-names>T.</given-names></name></person-group> (<year>2023</year>). <article-title>Impact of quantum noise on the training of quantum generative adversarial networks</article-title>. <source>J. Phys</source>. <volume>2438</volume>:<fpage>012093</fpage>. <pub-id pub-id-type="doi">10.1088/1742-6596/2438/1/012093</pub-id></citation>
</ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cacciari</surname> <given-names>M.</given-names></name> <name><surname>Salam</surname> <given-names>G. P.</given-names></name> <name><surname>Soyez</surname> <given-names>G.</given-names></name></person-group> (<year>2008</year>). <article-title>The anti-<italic>k</italic><sub><italic>t</italic></sub> jet clustering algorithm</article-title>. <source>J. High Energy Phys</source>. <volume>4</volume>:<fpage>63</fpage>. <pub-id pub-id-type="doi">10.1088/1126-6708/2008/04/063</pub-id></citation>
</ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cacciari</surname> <given-names>M.</given-names></name> <name><surname>Salam</surname> <given-names>G. P.</given-names></name> <name><surname>Soyez</surname> <given-names>G.</given-names></name></person-group> (<year>2012</year>). <article-title>Fastjet user manual</article-title>. <source>Eur. Phys. J. C</source> <volume>72</volume>:<fpage>1986</fpage>. <pub-id pub-id-type="doi">10.1140/epjc/s10052-012-1896-2</pub-id></citation>
</ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chang</surname> <given-names>S. Y.</given-names></name> <name><surname>Herbert</surname> <given-names>S.</given-names></name> <name><surname>Vallecorsa</surname> <given-names>S.</given-names></name> <name><surname>Combarro</surname> <given-names>E. F.</given-names></name> <name><surname>Duncan</surname> <given-names>R.</given-names></name></person-group> (<year>2021a</year>). <article-title>Dual-parameterized quantum circuit gan model in high energy physics</article-title>. <source>EPJ Web Conf</source>. <volume>251</volume>:<fpage>03050</fpage>. <pub-id pub-id-type="doi">10.1051/epjconf/202125103050</pub-id></citation>
</ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chang</surname> <given-names>S. Y.</given-names></name> <name><surname>Vallecorsa</surname> <given-names>S.</given-names></name> <name><surname>Combarro</surname> <given-names>E. F.</given-names></name> <name><surname>Carminati</surname> <given-names>F.</given-names></name></person-group> (<year>2021b</year>). <article-title>Quantum generative adversarial networks in a continuous-variable architecture to simulate high energy physics detectors</article-title>. <source>arXiv:2101.11132</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2101.11132</pub-id></citation>
</ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>S. Y.-C.</given-names></name> <name><surname>Wei</surname> <given-names>T.-C.</given-names></name> <name><surname>Zhang</surname> <given-names>C.</given-names></name> <name><surname>Yu</surname> <given-names>H.</given-names></name> <name><surname>Yoo</surname> <given-names>S.</given-names></name></person-group> (<year>2022</year>). <article-title>Quantum convolutional neural networks for high energy physics data analysis</article-title>. <source>Phys. Rev. Res</source>. <volume>4</volume>:<fpage>013231</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevResearch.4.013231</pub-id></citation>
</ref>
<ref id="B18">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>T.</given-names></name> <name><surname>Guestrin</surname> <given-names>C.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;XGBoost: a scalable tree boosting system,&#x0201D;</article-title> in <source>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD &#x00027;16</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>785</fpage>&#x02013;<lpage>794</lpage>. <pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id></citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Crispim Rom&#x000E3;o</surname> <given-names>M.</given-names></name> <name><surname>Castro</surname> <given-names>N. F.</given-names></name> <name><surname>Pedro</surname> <given-names>R.</given-names></name></person-group> (<year>2021</year>). <article-title>Finding new physics without learning about it: anomaly detection as a tool for searches at colliders</article-title>. <source>Eur. Phys. J. C</source> <volume>81</volume>:<fpage>27</fpage>. <pub-id pub-id-type="doi">10.1140/epjc/s10052-020-08807-w</pub-id></citation>
</ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Crispim Rom&#x000E3;o</surname> <given-names>M.</given-names></name> <name><surname>Castro</surname> <given-names>N. F.</given-names></name> <name><surname>Pedro</surname> <given-names>R.</given-names></name></person-group> (<year>2021</year>). <source>Simulated PP Collisions at 13 TeV With 2 Leptons &#x0002B; 1 b Jet Final State and Selected Benchmark Beyond the Standard Model Signals</source>. Dataset on Zenodo. <pub-id pub-id-type="doi">10.5281/zenodo.5126746</pub-id></citation>
</ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Das</surname> <given-names>S.</given-names></name> <name><surname>Wildridge</surname> <given-names>A. J.</given-names></name> <name><surname>Vaidya</surname> <given-names>S. B.</given-names></name> <name><surname>Jung</surname> <given-names>A.</given-names></name></person-group> (<year>2019</year>). <article-title>Track clustering with a quantum annealer for primary vertex reconstruction at hadron colliders</article-title>. <source>arXiv:1903.08879</source>. <pub-id pub-id-type="doi">10.48550/arXiv.1903.08879</pub-id></citation>
</ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>de Souza</surname> <given-names>F. A.</given-names></name> <name><surname>Crispim Rom&#x000E3;o</surname> <given-names>M.</given-names></name> <name><surname>Castro</surname> <given-names>N. F.</given-names></name> <name><surname>Nikjoo</surname> <given-names>M.</given-names></name> <name><surname>Porod</surname> <given-names>W.</given-names></name></person-group> (<year>2022</year>). <article-title>Exploring parameter spaces with artificial intelligence and machine learning black-box optimisation algorithms</article-title>. <source>Phys. Rev. D</source> <volume>107</volume>:<fpage>035004</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevD.107.035004</pub-id></citation>
</ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Delgado</surname> <given-names>A.</given-names></name> <name><surname>Hamilton</surname> <given-names>K. E.</given-names></name></person-group> (<year>2022</year>). <article-title>Unsupervised quantum circuit learning in high energy physics</article-title>. <source>Phys. Rev. D</source> <volume>106</volume>:<fpage>096006</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevD.106.096006</pub-id></citation>
</ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Durieux</surname> <given-names>G.</given-names></name> <name><surname>Maltoni</surname> <given-names>F.</given-names></name> <name><surname>Zhang</surname> <given-names>C.</given-names></name></person-group> (<year>2015</year>). <article-title>Global approach to top-quark flavor-changing interactions</article-title>. <source>Phys. Rev. D</source> <volume>91</volume>:<fpage>074017</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevD.91.074017</pub-id></citation>
</ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ellis</surname> <given-names>J.</given-names></name></person-group> (<year>2012</year>). <article-title>Outstanding questions: physics beyond the standard model</article-title>. <source>Philos. Trans. R. Soc. Lond. A</source> <volume>370</volume>, <fpage>818</fpage>&#x02013;<lpage>830</lpage>. <pub-id pub-id-type="doi">10.1098/rsta.2011.0452</pub-id><pub-id pub-id-type="pmid">22253238</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Farhi</surname> <given-names>E.</given-names></name> <name><surname>Neven</surname> <given-names>H.</given-names></name></person-group> (<year>2018</year>). <source>Classification with quantum neural networks on near term processors</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/1802.06002">https://arxiv.org/abs/1802.06002</ext-link></citation>
</ref>
<ref id="B27">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Feickert</surname> <given-names>M.</given-names></name> <name><surname>Nachman</surname> <given-names>B.</given-names></name></person-group> (<year>2021</year>). <source>A living review of machine learning for particle physics</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2102.02770">https://arxiv.org/abs/2102.02770</ext-link></citation>
</ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Funcke</surname> <given-names>L.</given-names></name> <name><surname>Hartung</surname> <given-names>T.</given-names></name> <name><surname>Heinemann</surname> <given-names>B.</given-names></name> <name><surname>Jansen</surname> <given-names>K.</given-names></name> <name><surname>Kropf</surname> <given-names>A.</given-names></name> <name><surname>K&#x000FC;hn</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Studying quantum algorithms for particle track reconstruction in the LUXE experiment</article-title>. <source>J. Phys</source>. <volume>2438</volume>:<fpage>12127</fpage>. <pub-id pub-id-type="doi">10.1088/1742-6596/2438/1/012127</pub-id></citation>
</ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gianelle</surname> <given-names>A.</given-names></name> <name><surname>Koppenburg</surname> <given-names>P.</given-names></name> <name><surname>Lucchesi</surname> <given-names>D.</given-names></name> <name><surname>Nicotra</surname> <given-names>D.</given-names></name> <name><surname>Rodrigues</surname> <given-names>E.</given-names></name> <name><surname>Sestini</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Quantum machine learning for b-jet charge identification</article-title>. <source>J. High Energy Phys</source>. <volume>08</volume>:<fpage>014</fpage>. <pub-id pub-id-type="doi">10.1007/JHEP08(2022)014</pub-id></citation>
</ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guan</surname> <given-names>W.</given-names></name> <name><surname>Perdue</surname> <given-names>G.</given-names></name> <name><surname>Pesah</surname> <given-names>A.</given-names></name> <name><surname>Schuld</surname> <given-names>M.</given-names></name> <name><surname>Terashi</surname> <given-names>K.</given-names></name> <name><surname>Vallecorsa</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Quantum machine learning in high energy physics</article-title>. <source>Mach. Learn. Sci. Technol</source>. <volume>2</volume>:<fpage>011003</fpage>. <pub-id pub-id-type="doi">10.1088/2632-2153/abc17d</pub-id></citation>
</ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guest</surname> <given-names>D.</given-names></name> <name><surname>Cranmer</surname> <given-names>K.</given-names></name> <name><surname>Whiteson</surname> <given-names>D.</given-names></name></person-group> (<year>2018</year>). <article-title>Deep learning and its application to LHC physics</article-title>. <source>Ann. Rev. Nucl. Part. Sci</source>. <volume>68</volume>, <fpage>161</fpage>&#x02013;<lpage>181</lpage>. <pub-id pub-id-type="doi">10.1146/annurev-nucl-101917-021019</pub-id></citation>
</ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gupta</surname> <given-names>S.</given-names></name> <name><surname>Zia</surname> <given-names>R.</given-names></name></person-group> (<year>2001</year>). <article-title>Quantum neural networks</article-title>. <source>J. Comput. Syst. Sci</source>. <volume>63</volume>, <fpage>355</fpage>&#x02013;<lpage>383</lpage>. <pub-id pub-id-type="doi">10.1006/jcss.2001.1769</pub-id></citation>
</ref>
<ref id="B33">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Kingma</surname> <given-names>D. P.</given-names></name> <name><surname>Ba</surname> <given-names>J.</given-names></name></person-group> (<year>2014</year>). <source>Adam: a method for stochastic optimization</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/1412.6980">https://arxiv.org/abs/1412.6980</ext-link></citation>
</ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>LaRose</surname> <given-names>R.</given-names></name> <name><surname>Coyle</surname> <given-names>B.</given-names></name></person-group> (<year>2020</year>). <article-title>Robust data encodings for quantum classifiers</article-title>. <source>Phys. Rev. A</source> <volume>102</volume>:<fpage>032420</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevA.102.032420</pub-id></citation>
</ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>G.</given-names></name> <name><surname>Ding</surname> <given-names>Y.</given-names></name> <name><surname>Xie</surname> <given-names>Y.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Tackling the Qubit Mapping Problem for NISQ-Era Quantum Devices,&#x0201D;</article-title> in <source>ASPLOS &#x00027;19: Proceedings of the Twenty-Fourth International Conference on Architectural Support for Programming Languages and Operating Systems</source>.</citation>
</ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>MacKay</surname> <given-names>D.</given-names></name></person-group> (<year>2003</year>). <source>Information Theory, Inference and Learning Algorithms</source>. Cambridge University Press.</citation>
</ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mott</surname> <given-names>A.</given-names></name> <name><surname>Job</surname> <given-names>J.</given-names></name> <name><surname>Vlimant</surname> <given-names>J.-R.</given-names></name> <name><surname>Lidar</surname> <given-names>D.</given-names></name> <name><surname>Spiropulu</surname> <given-names>M.</given-names></name></person-group> (<year>2017</year>). <article-title>Solving a higgs optimization problem with quantum annealing for machine learning</article-title>. <source>Nature</source> <volume>550</volume>, <fpage>375</fpage>&#x02013;<lpage>379</lpage>. <pub-id pub-id-type="doi">10.1038/nature24047</pub-id><pub-id pub-id-type="pmid">29052620</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ngairangbam</surname> <given-names>V. S.</given-names></name> <name><surname>Spannowsky</surname> <given-names>M.</given-names></name> <name><surname>Takeuchi</surname> <given-names>M.</given-names></name></person-group> (<year>2022</year>). <article-title>Anomaly detection in high-energy physics using a quantum autoencoder</article-title>. <source>Phys. Rev. D</source> <volume>105</volume>:<fpage>095004</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevD.105.095004</pub-id></citation>
</ref>
<ref id="B39">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Pedregosa</surname> <given-names>F.</given-names></name> <name><surname>Varoquaux</surname> <given-names>G.</given-names></name> <name><surname>Gramfort</surname> <given-names>A.</given-names></name> <name><surname>Michel</surname> <given-names>V.</given-names></name> <name><surname>Thirion</surname> <given-names>B.</given-names></name> <name><surname>Grisel</surname> <given-names>O.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>Scikit-learn: machine learning in python</article-title>. <source>J. Mach. Learn. Res</source>. <volume>12</volume>, <fpage>2825</fpage>&#x02013;<lpage>2830</lpage>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.jmlr.org/papers/v12/pedregosa11a.html">https://www.jmlr.org/papers/v12/pedregosa11a.html</ext-link></citation>
</ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Preskill</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>Quantum computing in the NISQ era and beyond</article-title>. <source>Quantum</source> <volume>2</volume>:<fpage>79</fpage>. <pub-id pub-id-type="doi">10.22331/q-2018-08-06-79</pub-id></citation>
</ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Raschka</surname> <given-names>S.</given-names></name></person-group> (<year>2018</year>). <article-title>Mlxtend: Providing machine learning and data science utilities and extensions to python&#x00027;s scientific computing stack</article-title>. <source>J. Open Source Softw</source>. <volume>3</volume>:<fpage>24</fpage>. <pub-id pub-id-type="doi">10.21105/joss.00638</pub-id></citation>
</ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rehm</surname> <given-names>F.</given-names></name> <name><surname>Vallecorsa</surname> <given-names>S.</given-names></name> <name><surname>Grossi</surname> <given-names>M.</given-names></name> <name><surname>Borras</surname> <given-names>K.</given-names></name> <name><surname>Kr&#x000FC;cker</surname> <given-names>D.</given-names></name></person-group> (<year>2023</year>). <article-title>A full quantum generative adversarial network model for high energy physics simulations</article-title>. <source>arXiv:2305.07284</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2305.07284</pub-id></citation>
</ref>
<ref id="B43">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Schuhmacher</surname> <given-names>J.</given-names></name> <name><surname>Boggia</surname> <given-names>L.</given-names></name> <name><surname>Belis</surname> <given-names>V.</given-names></name> <name><surname>Puljak</surname> <given-names>E.</given-names></name> <name><surname>Grossi</surname> <given-names>M.</given-names></name> <name><surname>Pierini</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2023</year>). Unravelling physics beyond the standard model with classical and quantum anomaly detection. <pub-id pub-id-type="doi">10.1088/2632-2153/ad07f7</pub-id> Available online at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2301.10787">https://arxiv.org/abs/2301.10787</ext-link></citation>
</ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schuld</surname> <given-names>M.</given-names></name> <name><surname>Bocharov</surname> <given-names>A.</given-names></name> <name><surname>Svore</surname> <given-names>K. M.</given-names></name> <name><surname>Wiebe</surname> <given-names>N.</given-names></name></person-group> (<year>2020</year>). <article-title>Circuit-centric quantum classifiers</article-title>. <source>Phys. Rev. A</source> <volume>101</volume>:<fpage>32308</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevA.101.032308</pub-id></citation>
</ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schuld</surname> <given-names>M.</given-names></name> <name><surname>Sinayskiy</surname> <given-names>I.</given-names></name> <name><surname>Petruccione</surname> <given-names>F.</given-names></name></person-group> (<year>2015</year>). <article-title>Simulating a perceptron on a quantum computer</article-title>. <source>Phys. Lett. A</source> <volume>379</volume>, <fpage>660</fpage>&#x02013;<lpage>663</lpage>. <pub-id pub-id-type="doi">10.1016/j.physleta.2014.11.061</pub-id></citation>
</ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Selvaggi</surname> <given-names>M.</given-names></name></person-group> (<year>2014</year>). <article-title>DELPHES 3: a modular framework for fast-simulation of generic collider experiments</article-title>. <source>J. Phys. Conf. Ser</source>. <volume>523</volume>:<fpage>012033</fpage>. <pub-id pub-id-type="doi">10.1088/1742-6596/523/1/012033</pub-id></citation>
</ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shapoval</surname> <given-names>I.</given-names></name> <name><surname>Calafiura</surname> <given-names>P.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Quantum associative memory in HEP track pattern recognition,&#x0201D;</article-title> in <source>EPJ Web of Conferences</source>, Vol. 214, 01012. <pub-id pub-id-type="doi">10.1051/epjconf/201921401012</pub-id></citation>
</ref>
<ref id="B48">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sj&#x000F6;strand</surname> <given-names>T.</given-names></name> <name><surname>Ask</surname> <given-names>S.</given-names></name> <name><surname>Christiansen</surname> <given-names>J. R.</given-names></name> <name><surname>Corke</surname> <given-names>R.</given-names></name> <name><surname>Desai</surname> <given-names>N.</given-names></name> <name><surname>Ilten</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>An introduction to PYTHIA 8.2</article-title>. <source>Comput. Phys. Commun</source>. <volume>191</volume>, <fpage>159</fpage>&#x02013;<lpage>177</lpage>. <pub-id pub-id-type="doi">10.1016/j.cpc.2015.01.024</pub-id></citation>
</ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Terashi</surname> <given-names>K.</given-names></name> <name><surname>Kaneda</surname> <given-names>M.</given-names></name> <name><surname>Kishimoto</surname> <given-names>T.</given-names></name> <name><surname>Saito</surname> <given-names>M.</given-names></name> <name><surname>Sawada</surname> <given-names>R.</given-names></name> <name><surname>Tanaka</surname> <given-names>J.</given-names></name></person-group> (<year>2021</year>). <article-title>Event classification with quantum machine learning in high-energy physics</article-title>. <source>Comput. Softw. Big Sci</source>. <volume>5</volume>, <fpage>1</fpage>&#x02013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1007/s41781-020-00047-7</pub-id></citation>
</ref>
<ref id="B50">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Thaler</surname> <given-names>J.</given-names></name> <name><surname>Van Tilburg</surname> <given-names>K.</given-names></name></person-group> (<year>2011</year>). <article-title>Identifying boosted objects with n-subjettiness</article-title>. <source>J. High Energy Phys</source>. <volume>2011</volume>, <fpage>1</fpage>&#x02013;<lpage>28</lpage>. <pub-id pub-id-type="doi">10.1007/JHEP03(2011)015</pub-id></citation>
</ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>T&#x000FC;ys&#x000FC;z</surname> <given-names>C.</given-names></name> <name><surname>Carminati</surname> <given-names>F.</given-names></name> <name><surname>Demirk&#x000F6;z</surname> <given-names>B.</given-names></name> <name><surname>Dobos</surname> <given-names>D.</given-names></name> <name><surname>Fracas</surname> <given-names>F.</given-names></name> <name><surname>Novotny</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Particle track reconstruction with quantum algorithms</article-title>. <source>EPJ Web Conf.</source> <volume>245</volume>:<fpage>09013</fpage>. <pub-id pub-id-type="doi">10.1051/epjconf/202024509013</pub-id></citation>
</ref>
<ref id="B52">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vouros</surname> <given-names>A.</given-names></name> <name><surname>Langdell</surname> <given-names>S.</given-names></name> <name><surname>Croucher</surname> <given-names>M.</given-names></name> <name><surname>Vasilaki</surname> <given-names>E.</given-names></name></person-group> (<year>2021</year>). <article-title>An empirical comparison between stochastic and deterministic centroid initialisation for k-means variations</article-title>. <source>Mach. Learn</source>. <volume>110</volume>, <fpage>1975</fpage>&#x02013;<lpage>2003</lpage>. <pub-id pub-id-type="doi">10.1007/s10994-021-06021-7</pub-id></citation>
</ref>
<ref id="B53">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wei</surname> <given-names>A. Y.</given-names></name> <name><surname>Naik</surname> <given-names>P.</given-names></name> <name><surname>Harrow</surname> <given-names>A. W.</given-names></name> <name><surname>Thaler</surname> <given-names>J.</given-names></name></person-group> (<year>2020</year>). <article-title>Quantum algorithms for jet clustering</article-title>. <source>Phys. Rev. D</source> <volume>101</volume>:<fpage>094015</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevD.101.094015</pub-id></citation>
</ref>
<ref id="B54">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wilkin</surname> <given-names>G. A.</given-names></name> <name><surname>Xiuzhen</surname> <given-names>H.</given-names></name></person-group> (<year>2008</year>). <article-title>A practical comparison of two k-means clustering algorithms</article-title>. <source>BMC Bioinformatics</source> <volume>9</volume>(<supplement>Suppl. 6</supplement>):S19. <pub-id pub-id-type="doi">10.1186/1471-2105-9-S6-S19</pub-id></citation>
</ref>
<ref id="B55">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wo&#x0017A;niak</surname> <given-names>K. A.</given-names></name> <name><surname>Belis</surname> <given-names>V.</given-names></name> <name><surname>Puljak</surname> <given-names>E.</given-names></name> <name><surname>Barkoutsos</surname> <given-names>P.</given-names></name> <name><surname>Dissertori</surname> <given-names>G.</given-names></name> <name><surname>Grossi</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Quantum anomaly detection in the latent space of proton collision events at the LHC</article-title>. <source>arXiv:2301.10780.</source> <pub-id pub-id-type="doi">10.48550/arXiv.2301.10780</pub-id></citation>
</ref>
<ref id="B56">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>S. L.</given-names></name> <name><surname>Sun</surname> <given-names>S.</given-names></name> <name><surname>Guan</surname> <given-names>W.</given-names></name> <name><surname>Zhou</surname> <given-names>C.</given-names></name> <name><surname>Chan</surname> <given-names>J.</given-names></name> <name><surname>Cheng</surname> <given-names>C. L.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Application of quantum machine learning using the quantum kernel algorithm on high energy physics analysis at the LHC</article-title>. <source>Phys. Rev. Res</source>. <volume>3</volume>:<fpage>033221</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevResearch.3.033221</pub-id></citation>
</ref>
<ref id="B57">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zlokapa</surname> <given-names>A.</given-names></name> <name><surname>Anand</surname> <given-names>A.</given-names></name> <name><surname>Vlimant</surname> <given-names>J.-R.</given-names></name> <name><surname>Duarte</surname> <given-names>J. M.</given-names></name> <name><surname>Job</surname> <given-names>J.</given-names></name> <name><surname>Lidar</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2021a</year>). <article-title>Charged particle tracking with quantum annealing optimization</article-title>. <source>Quant. Mach. Intell</source>. <volume>3</volume>, <fpage>1</fpage>&#x02013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1007/s42484-021-00054-w</pub-id></citation>
</ref>
<ref id="B58">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zlokapa</surname> <given-names>A.</given-names></name> <name><surname>Mott</surname> <given-names>A.</given-names></name> <name><surname>Job</surname> <given-names>J.</given-names></name> <name><surname>Vlimant</surname> <given-names>J.-R.</given-names></name> <name><surname>Lidar</surname> <given-names>D.</given-names></name> <name><surname>Spiropulu</surname> <given-names>M.</given-names></name></person-group> (<year>2021b</year>). <article-title>Quantum adiabatic machine learning with zooming</article-title>. <source>Bull. Am. Phys. Soc</source>. <volume>66</volume>:<fpage>62405</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevA.102.062405</pub-id></citation>
</ref>
</ref-list>
</back>
</article>