<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Public Health</journal-id>
<journal-title>Frontiers in Public Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Public Health</abbrev-journal-title>
<issn pub-type="epub">2296-2565</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpubh.2024.1303319</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Public Health</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Artificial intelligence large language model ChatGPT: is it a trustworthy and reliable source of information for sarcoma patients?</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author"><name><surname>Valentini</surname> <given-names>Marisa</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2518197/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author"><name><surname>Szkandera</surname> <given-names>Joanna</given-names></name><xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1522384/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author"><name><surname>Smolle</surname> <given-names>Maria Anna</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1869631/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
<contrib contrib-type="author"><name><surname>Scheipl</surname> <given-names>Susanne</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
</contrib>
<contrib contrib-type="author"><name><surname>Leithner</surname> <given-names>Andreas</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/116257/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Andreou</surname> <given-names>Dimosthenis</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2322178/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Orthopaedics and Trauma, Medical University of Graz</institution>, <addr-line>Graz</addr-line>, <country>Austria</country></aff>
<aff id="aff2"><sup>2</sup><institution>Division of Oncology, Department of Internal Medicine, Medical University of Graz</institution>, <addr-line>Graz</addr-line>, <country>Austria</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0002">
<p>Edited by: Ulises Cort&#x00E9;s, Universitat Politecnica de Catalunya, Spain</p>
</fn>
<fn fn-type="edited-by" id="fn0003">
<p>Reviewed by: James C. L. Chow, University of Toronto, Canada</p>
<p>Shailesh Tripathi, Rajendra Institute of Medical Sciences, India</p>
<p>Sumeet Patiyal, National Cancer Institute (NIH), United States</p>
</fn>
<corresp id="c001">&#x002A;Correspondence: Dimosthenis Andreou, <email>dimosthenis.andreou@medunigraz.at</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>22</day>
<month>03</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>12</volume>
<elocation-id>1303319</elocation-id>
<history>
<date date-type="received">
<day>30</day>
<month>09</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>06</day>
<month>03</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2024 Valentini, Szkandera, Smolle, Scheipl, Leithner and Andreou.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Valentini, Szkandera, Smolle, Scheipl, Leithner and Andreou</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec id="sec1">
<title>Introduction</title>
<p>Since its introduction in November 2022, the artificial intelligence large language model ChatGPT has taken the world by storm. Among other applications it can be used by patients as a source of information on diseases and their treatments. However, little is known about the quality of the sarcoma-related information ChatGPT provides. We therefore aimed at analyzing how sarcoma experts evaluate the quality of ChatGPT&#x2019;s responses on sarcoma-related inquiries and assess the bot&#x2019;s answers in specific evaluation metrics.</p>
</sec>
<sec id="sec2">
<title>Methods</title>
<p>The ChatGPT responses to a sample of 25 sarcoma-related questions (5 definitions, 9 general questions, and 11 treatment-related inquiries) were evaluated by 3 independent sarcoma experts. Each response was compared with authoritative resources and international guidelines and graded on 5 different metrics using a 5-point Likert scale: completeness, misleadingness, accuracy, being up-to-date, and appropriateness. This resulted in maximum 25 and minimum 5 points per answer, with higher scores indicating a higher response quality. Scores &#x2265;21 points were rated as very good, between 16 and 20 as good, while scores &#x2264;15 points were classified as poor (11&#x2013;15) and very poor (&#x2264;10).</p>
</sec>
<sec id="sec3">
<title>Results</title>
<p>The median score that ChatGPT&#x2019;s answers achieved was 18.3 points (IQR, i.e., Inter-Quartile Range, 12.3&#x2013;20.3 points). Six answers were classified as very good, 9 as good, while 5 answers each were rated as poor and very poor. The best scores were documented in the evaluation of how appropriate the response was for patients (median, 3.7 points; IQR, 2.5&#x2013;4.2 points), which were significantly higher compared to the accuracy scores (median, 3.3 points; IQR, 2.0&#x2013;4.2 points; <italic>p</italic> =&#x2009;0.035). ChatGPT fared considerably worse with treatment-related questions, with only 45% of its responses classified as good or very good, compared to general questions (78% of responses good/very good) and definitions (60% of responses good/very good).</p>
</sec>
<sec id="sec4">
<title>Discussion</title>
<p>The answers ChatGPT provided on a rare disease, such as sarcoma, were found to be of very inconsistent quality, with some answers being classified as very good and others as very poor. Sarcoma physicians should be aware of the risks of misinformation that ChatGPT poses and advise their patients accordingly.</p>
</sec>
</abstract>
<kwd-group>
<kwd>artificial intelligence</kwd>
<kwd>ChatGPT</kwd>
<kwd>sarcoma</kwd>
<kwd>patient information</kwd>
<kwd>information quality</kwd>
</kwd-group>
<counts>
<fig-count count="4"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="24"/>
<page-count count="6"/>
<word-count count="4456"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Digital Public Health</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec5">
<title>Introduction</title>
<p>Sarcomas are a heterogeneous group of rare malignant tumors, accounting for merely 1% of all cancer diagnoses (<xref ref-type="bibr" rid="ref1">1</xref>). Their overall incidence is estimated at approximately 7.1&#x2013;7.4 per 100,000 patients per year (<xref ref-type="bibr" rid="ref1">1</xref>, <xref ref-type="bibr" rid="ref2">2</xref>). Due to their rarity and complexity, international guidelines recommend a multidisciplinary diagnostic and therapeutic approach at specialized sarcoma centers (<xref ref-type="bibr" rid="ref3 ref4 ref5 ref6">3&#x2013;6</xref>). Finding accurate and reliable information can be challenging for patients, caregivers, and healthcare professionals who are not specialized in sarcoma treatment (<xref ref-type="bibr" rid="ref7">7</xref>).</p>
<p>Since its introduction in November 2022, the Artificial Intelligence (AI) Chat Generative Pre-trained Transformer (ChatGPT) has taken the world by storm (<xref ref-type="bibr" rid="ref8 ref9 ref10 ref11 ref12">8&#x2013;12</xref>). ChatGPT is a 175-billion-parameter natural language processing model (GPT 3.5), able to generate conversation-style responses to user input (<xref ref-type="bibr" rid="ref10">10</xref>). As a large language model trained on a massive dataset of text and data available online, it is able to generate responses to a wide range of questions (<xref ref-type="bibr" rid="ref13">13</xref>). Due to its very nature, the artificial intelligence chatbot can address an almost limitless range of inquiries, but it is not capable of verifying the accuracy of its responses and may not provide the most up-to-date or comprehensive information. Among other applications, it has been used and will likely be increasingly used by patients as a source of information on diseases and their treatments, but its potential to generate inaccurate or false information is a major cause for concern (<xref ref-type="bibr" rid="ref14 ref15 ref16 ref17 ref18">14&#x2013;18</xref>). Previous studies have shown that the bot may be a useful source of information for common rheumatic diseases (<xref ref-type="bibr" rid="ref19">19</xref>) and provide more empathetic answers to general public questions compared to physicians (<xref ref-type="bibr" rid="ref14">14</xref>), but also demonstrated that the quality of the bot&#x2019;s responses is worse when confronted with more complex medical questions (<xref ref-type="bibr" rid="ref20">20</xref>, <xref ref-type="bibr" rid="ref21">21</xref>).</p>
<p>Very little is known about the quality of the sarcoma-related information ChatGPT provides. The authors chose to focus on this rare group of tumors, as their complexity and the lack of safe online information on this topic are well known and a cause for concern (<xref ref-type="bibr" rid="ref22 ref23 ref24">22&#x2013;24</xref>). Therefore, we aimed at evaluating how complete, misleading, accurate, up-to-date, and appropriate the Open AI chatbot&#x2019;s answers to sarcoma-related inquiries are, assessing the quality of the information it imparts. Specifically, we analyzed how sarcoma experts evaluate the quality of ChatGPT&#x2019;s responses on sarcoma-related inquiries, how the bot&#x2019;s responses perform in specific metrics of the evaluation, and if ChatGPT fares better with a specific type of questions.</p>
</sec>
<sec sec-type="materials|methods" id="sec6">
<title>Materials and methods</title>
<p>A sample of 25 representative sarcoma-related questions were posed to ChatGPT (<italic>ChatGPT 3.5 free version</italic>) (<xref ref-type="table" rid="tab1">Table 1</xref>). These included 5 definitions (e.g., what is a tenosynovial giant cell tumor?), 9 general questions (e.g., which imaging modalities are best in follow-up after treatment of soft tissue sarcoma, or what are common side effects of chemotherapy for Ewing sarcoma?), and 11 treatment-related inquiries (e.g., what is the optimal treatment of a desmoid tumor?). Three sarcoma experts (2 orthopedic oncologists and 1 medical oncologist) evaluated the artificial intelligence chatbot&#x2019;s responses, comparing them to international guidelines and authoritative resources. The evaluation was performed independently by and without contact between these experts. Each response was graded with regards to 5 different aspects/evaluation metrics, using a 5-point Likert scale: completeness, misleadingness, accuracy (i.e., whether the response contained relevant factual errors), being up-to-date, and appropriateness (i.e., whether it&#x2019;d be a good source of information for patients) (<xref ref-type="table" rid="tab2">Table 2</xref>). This resulted in a maximum of 25 and a minimum of 5 points per answer, with higher scores indicating higher quality of the ChatGPT response. Scores &#x2265;21 were defined as very good, between 16 and 20 points as good, while responses that scored less than 15 points were classified as poor (11&#x2013;15) and very poor (&#x2264;10).</p>
<table-wrap position="float" id="tab1"><label>Table 1</label>
<caption>
<p>The 25 sarcoma-related questions which were posed to ChatGPT.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">
<bold>N&#x00B0;</bold>
</th>
<th align="left" valign="top">
<bold>Questions</bold>
</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="bottom">1</td>
<td align="left" valign="bottom">What is the optimal treatment of a desmoid tumor?</td>
</tr>
<tr>
<td align="left" valign="bottom">2</td>
<td align="left" valign="bottom">What is the optimal treatment of Ewing sarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">3</td>
<td align="left" valign="bottom">What are the most helpful chemotherapeutic agents in the treatment of Ewing sarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">4</td>
<td align="left" valign="bottom">Is follow-up necessary after treatment of soft tissue sarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">5</td>
<td align="left" valign="bottom">Which imaging modalities are best in follow-up after treatment of soft tissue sarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">6</td>
<td align="left" valign="bottom">What is the preferred surgical treatment for clear cell chondrosarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">7</td>
<td align="left" valign="bottom">What is the optimal treatment of a retroperitoneal liposarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">8</td>
<td align="left" valign="bottom">Is preoperative radiotherapy better than postoperative radiotherapy in patients with myxoid liposarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">9</td>
<td align="left" valign="bottom">What late effects are possible after successful multidisciplinary treatment of osteosarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">10</td>
<td align="left" valign="bottom">How can I enroll in a clinical trial for Ewing sarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">11</td>
<td align="left" valign="bottom">Which clinical trials are available for Ewing sarcoma in Germany?</td>
</tr>
<tr>
<td align="left" valign="bottom">12</td>
<td align="left" valign="bottom">What is a biopsy for Ewing sarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">13</td>
<td align="left" valign="bottom">What is the difference between enchondromas and atypical cartillaginous tumors?</td>
</tr>
<tr>
<td align="left" valign="bottom">14</td>
<td align="left" valign="bottom">What are common side effects of chemotherapy for Ewing sarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">15</td>
<td align="left" valign="bottom">What is the difference between a lipoma and an atypical lipomatous tumor?</td>
</tr>
<tr>
<td align="left" valign="bottom">16</td>
<td align="left" valign="bottom">What is a tenosynovial giant cell tumor?</td>
</tr>
<tr>
<td align="left" valign="bottom">17</td>
<td align="left" valign="bottom">I have a Ewing sarcoma of the upper thigh bone. What is my prognosis?</td>
</tr>
<tr>
<td align="left" valign="bottom">18</td>
<td align="left" valign="bottom">Is an allograft-prosthetic-composite better than a megaprosthesis for an osteosarcoma of the proximal tibia?</td>
</tr>
<tr>
<td align="left" valign="bottom">19</td>
<td align="left" valign="bottom">What is a rotationplasty?</td>
</tr>
<tr>
<td align="left" valign="bottom">20</td>
<td align="left" valign="bottom">Is rotationplasty better or worse than above-knee amputation for osteosarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">21</td>
<td align="left" valign="bottom">When is postoperative radiotherapy recommended for Ewing sarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">22</td>
<td align="left" valign="bottom">When is postoperative radiotherapy recommended for osteosarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">23</td>
<td align="left" valign="bottom">What are the advantages and disadvantages of preoperative denosumab treatment for giant cell tumor of bone?</td>
</tr>
<tr>
<td align="left" valign="bottom">24</td>
<td align="left" valign="bottom">What functional outcome can be expected after proximal humerus replacement with megaprosthesis for osteosarcoma?</td>
</tr>
<tr>
<td align="left" valign="bottom">25</td>
<td align="left" valign="bottom">What is the best treatment for gastrointestinal stromal tumors?</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap position="float" id="tab2"><label>Table 2</label>
<caption>
<p>The aspects of each ChatGPT response that were evaluated using a 5-point Likert scale.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" colspan="2">Evaluated aspects</th>
<th align="center" valign="top" colspan="5">Score</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">1</td>
<td align="left" valign="top">Is the provided information complete?</td>
<td align="center" valign="top"><bold>5</bold> <italic>strongly agree</italic></td>
<td align="center" valign="top"><bold>4</bold> <italic>agree</italic></td>
<td align="center" valign="top"><bold>3</bold> <italic>neutral</italic></td>
<td align="center" valign="top"><bold>2</bold> <italic>disagree</italic></td>
<td align="center" valign="top"><bold>1</bold> <italic>strongly disagree</italic></td>
</tr>
<tr>
<td align="left" valign="top">2</td>
<td align="left" valign="top">Is the provided answer misleading?</td>
<td align="center" valign="top"><bold>1</bold> <italic>strongly agree</italic></td>
<td align="center" valign="top"><bold>2</bold> <italic>agree</italic></td>
<td align="center" valign="top"><bold>3</bold> <italic>neutral</italic></td>
<td align="center" valign="top"><bold>4</bold> <italic>disagree</italic></td>
<td align="center" valign="top"><bold>5</bold> <italic>strongly disagree</italic></td>
</tr>
<tr>
<td align="left" valign="top">3</td>
<td align="left" valign="top">Are there relevant factual errors in the provided information?</td>
<td align="center" valign="top"><bold>1</bold> <italic>strongly agree</italic></td>
<td align="center" valign="top"><bold>2</bold> <italic>agree</italic></td>
<td align="center" valign="top"><bold>3</bold> <italic>neutral</italic></td>
<td align="center" valign="top"><bold>4</bold> <italic>disagree</italic></td>
<td align="center" valign="top"><bold>5</bold> <italic>strongly disagree</italic></td>
</tr>
<tr>
<td align="left" valign="top">4</td>
<td align="left" valign="top">Is the provided information up to date?</td>
<td align="center" valign="top"><bold>5</bold> <italic>strongly agree</italic></td>
<td align="center" valign="top"><bold>4</bold> <italic>agree</italic></td>
<td align="center" valign="top"><bold>3</bold> <italic>neutral</italic></td>
<td align="center" valign="top"><bold>2</bold> <italic>disagree</italic></td>
<td align="center" valign="top"><bold>1</bold> <italic>strongly disagree</italic></td>
</tr>
<tr>
<td align="left" valign="top">5</td>
<td align="left" valign="top">Is the provided answer a good source of information for patients?</td>
<td align="center" valign="top"><bold>5</bold> <italic>strongly agree</italic></td>
<td align="center" valign="top"><bold>4</bold> <italic>agree</italic></td>
<td align="center" valign="top"><bold>3</bold> <italic>neutral</italic></td>
<td align="center" valign="top"><bold>2</bold> <italic>disagree</italic></td>
<td align="center" valign="top"><bold>1</bold> <italic>strongly disagree</italic></td>
</tr>
</tbody>
</table>
</table-wrap>
<p>An approval from our local ethic committee was not required, as the study did not involve human subjects.</p>
<p>Statistical analyses were performed with Stata Version 16.1 for Mac (<italic>StataCorp, College Station, TX, US</italic>). Continuous variables were checked for normality with the Shapiro&#x2013;Wilk test. Median values with the respective Inter-Quartile Ranges (IQR) were reported for non-normally distributed variables. The values of different aspects of a ChatGPT response were compared with the Wilcoxon signed-rank test. The overall scores of the responses in the three pre-defined categories (definitions, general questions, and treatment-related inquiries) were compared with Kruskal-Wallis and post-hoc Dunn tests. A <italic>p</italic>-value of &#x003C;0.05 was considered significant.</p>
</sec>
<sec sec-type="results" id="sec7">
<title>Results</title>
<p>The ChatGPT responses achieved a median score of 18.3 points (IQR, 12.3&#x2013;20.3 points). The individual scores of each of the 5 evaluated aspects amounted to a median of 3.5 points (IQR, 2.4&#x2013;4.2 points). Six of the 25 responses (24%) were classified as very good, 9/25 (36%) as good, while 5/25 answers each (20%) were defined as poor and very poor, respectively (<xref ref-type="fig" rid="fig1">Figure 1</xref>).</p>
<fig position="float" id="fig1"><label>Figure 1</label>
<caption>
<p>This figure depicts the evaluation of the quality of ChatGPT responses by sarcoma experts. The percentage value in each bar is based on the total number of questions (25).</p>
</caption>
<graphic xlink:href="fpubh-12-1303319-g001.tif"/>
</fig>
<p>Concerning the 5 evaluated aspects, the best scores (<xref ref-type="fig" rid="fig2">Figure 2</xref>) were recorded in the evaluation metric of how appropriate the response was for patients (median, 3.7 points; IQR, 2.5&#x2013;4.2 points), which were significantly higher compared to the accuracy scores (median, 3.3 points; IQR, 2.0&#x2013;4.2 points; <italic>p</italic> =&#x2009;0.035). On the other hand, with the numbers we had the differences between the accuracy and completeness scores (median, 3.5 points; IQR, 2.8&#x2013;4.0; <italic>p</italic> =&#x2009;0.066) did not reach statistical significance. The remaining comparisons between the evaluation metrics showed no statistically significant differences.</p>
<fig position="float" id="fig2"><label>Figure 2</label>
<caption>
<p>The graph shows the scores that the ChatGPT responses achieved in each specific metric of the evaluation. On the X axis the individual metrics are presented as A (completeness), B (misleadingness), C (accuracy), D (being up-to-date), and E (appropriateness). The Y axis shows the score per aspect on a 5-point Likert scale with the respective medians and IQRs.</p>
</caption>
<graphic xlink:href="fpubh-12-1303319-g002.tif"/>
</fig>
<p>As for the 3 categories of questions, ChatGPT fared best with general inquiries, achieving good and very good overall scores in 3/9 (33%) and 4/9 (44%) questions, respectively. Only 1/9 (11%) response each was rated as poor and very poor, respectively (<xref ref-type="fig" rid="fig3">Figure 3</xref>). On the other hand, the bot fared considerably worse on treatment-related questions, achieving good and very good overall scores in 3/11 (27%) and 2/11 (18%), respectively. 3/11 (27%) responses each were classified as poor and very poor, respectively (<xref ref-type="fig" rid="fig4">Figure 4</xref>). However, with the numbers available for this analysis, these differences did not reach statistical significance (<italic>p</italic> =&#x2009;0.063). Finally, the bot&#x2019;s responses on definitions ranged better than treatment-related replies; 60% of the ChatGPT responses were classified as good (2/5, 40%) or very good (1/5, 20%), while 1/5 (20%) response each was classified as poor and very poor, respectively. No statistical significance was detected in this case as well.</p>
<fig position="float" id="fig3"><label>Figure 3</label>
<caption>
<p>This figure shows the evaluation of the quality of ChatGPT responses to general questions. The percentage value in each bar is based on the total number of questions in this category (9).</p>
</caption>
<graphic xlink:href="fpubh-12-1303319-g003.tif"/>
</fig>
<fig position="float" id="fig4"><label>Figure 4</label>
<caption>
<p>This figure shows the evaluation of the quality of ChatGPT responses to treatment related questions. The percentage value in each bar is based on the total number of questions in this category (11).</p>
</caption>
<graphic xlink:href="fpubh-12-1303319-g004.tif"/>
</fig>
</sec>
<sec sec-type="discussion" id="sec8">
<title>Discussion</title>
<p>Based on the extraordinary popularity the artificial intelligence bot ChatGPT achieved in only a few months, it is expected to quickly become an everyday health information source for patients (<xref ref-type="bibr" rid="ref8">8</xref>, <xref ref-type="bibr" rid="ref14 ref15 ref16 ref17">14&#x2013;17</xref>). However, little is known about the quality of information it can provide regarding rare diseases, such as sarcoma. Our study demonstrated that the responses provided by ChatGPT to sarcoma-related questions were very inconsistent in quality, ranging from very good to very poor ones. The responses scored better in the metric of appropriateness for patients and worse in their accuracy, while the bot generally fared better with general questions and worse with specific treatment-related inquiries.</p>
<p>We acknowledge that our study has several limitations. First of all, given the variety in presentation, prognosis, and treatment of bone and soft tissue sarcomas, our sample of 25 questions cannot be expected to cover all aspects of these rare diseases. However, we deliberately opted for a relatively small sample to avoid a bloated analysis, while the individual questions were carefully chosen based on our clinical experience to be representative of the wide range of questions patients, relatives, or caregivers might ask the Open AI chatbot. Another possible limitation of our study is that ChatGPT 3.5 was used. This free version of the AI model was trained on a massive dataset of information before its release in November 2022 and does not undergo regular updates. A newer GPT 4 model was released in March 2023. Its enhanced capabilities include being a multimodal model, taking also images as input, and the ability to interact with external interfaces. On the other hand, it needs to be considered that ChatGPT has 180.5 million active users, but only an estimated 1% subscribe to &#x201C;ChatGPT Plus&#x201D; (giving access to the GPT 4 model for 20$/month).<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> This aspect of the accessibility and actual use of the GPT 4 (paid version) is of great importance. Most patients with sarcoma will most likely access the free version (ChatGPT 3.5) to seek information. Therefore, the authors believe that this study&#x2019;s results are relevant as they are based on the ChatGPT version that most patients and their relatives will actually use. As such, our results reflect the information most patients will receive through the free model. Furthermore, it is not guaranteed that the GPT 4 model provides more accurate information in a rare disease, such as sarcoma, taking into consideration the long-known problem of inaccurate, outdated, and misleading sarcoma information even in reputable online sources (<xref ref-type="bibr" rid="ref22 ref23 ref24">22&#x2013;24</xref>).</p>
<p>The overall quality of ChatGPT responses on sarcoma-related inquiries in our study varied from very good to very poor. This variability harbors a great risk for patients in case they use ChatGPT as an information source. If the first ChatGPT responses to patient queries happen to be similar in quality and context as those provided by the treating physicians, patients would likely deem the bot to be trustworthy, without realizing that further answers might be of inferior or even very poor quality. Chow et al. (<xref ref-type="bibr" rid="ref17">17</xref>) pointed out similar concerns regarding ChatGPT&#x2019;s use as a medical chatbot: as it draws information from the internet, this &#x201C;disruptive technology&#x201D; can cause for &#x201C;questionable and uncontrollable&#x201D; accuracy and currency of medical information. Contrary to our findings on sarcoma-related responses, Uz and Umay recently evaluated the responses of ChatGPT to frequently searched keywords relating to common rheumatic diseases and found them to be a reliable and useful source of information for patients (<xref ref-type="bibr" rid="ref19">19</xref>).</p>
<p>A possible reason for this discrepancy is the rarity of sarcomas, compared to the relatively high prevalence of rheumatic disorders. Given that ChatGPT is trained on massive datasets of online available information, incomplete, erroneous, or outdated online data on a specific topic would lead to poorer bot responses. Zade et al. (<xref ref-type="bibr" rid="ref24">24</xref>) previously analyzed the quality of online resources for orthopedic oncology in 48 websites and found a general lack of quality and accuracy, an issue that has been reported by other studies as well (<xref ref-type="bibr" rid="ref23">23</xref>). As such, it appears unlikely that ChatGPT will be able to consistently provide high-quality responses to sarcoma-related queries in the foreseeable future.</p>
<p>Our evaluation of different parameters of ChatGPT&#x2019;s responses demonstrated that the bot achieved its worse scores in the accuracy metric, a finding well in-line with the previously mentioned weaknesses of the artificial intelligence&#x2019;s sources on a rare disease like sarcoma.</p>
<p>On the other hand, its best scores in our study were documented in the metric &#x201C;appropriateness for patients.&#x201D; Our results are in line with the findings of Ayers et al. (<xref ref-type="bibr" rid="ref14">14</xref>), who performed a blinded study comparing physicians&#x2019; and ChatGPT&#x2019;s responses on public questions asked by patients on a social media forum. The bot&#x2019;s responses were rated significantly more empathetic than the physicians&#x2019; replies and achieved the highest empathy scores on a Likert scale approximately 10 times more often compared to the physicians&#x2019; responses (<xref ref-type="bibr" rid="ref14">14</xref>). The authors concluded that the addition of artificial intelligence assistants to patient messaging workflows appeared to be promising, stressing however the need for human review of generated content for accuracy and potential false or fabricated information (<xref ref-type="bibr" rid="ref14">14</xref>).</p>
<p>Finally, we were able to show that ChatGPT fared better with general questions and definitions, and considerably worse with treatment-related inquiries. Several other studies have also demonstrated that the quality of the bot&#x2019;s responses in a specific medical field may vary depending on the complexity of the posed inquiries. Hoch et al. (<xref ref-type="bibr" rid="ref20">20</xref>) analyzed the accuracy of ChatGPT&#x2019;s responses to practice multiple choice questions designed for otolaryngology board certification and found significant variations in the rates of correct responses between different subspecialties. The authors suggested that this finding might be explained due to a varying availability and quality of training data in the different categories, with the bot performing better in most common categories and worse in rarer subspecialties with potentially more limited literature data (<xref ref-type="bibr" rid="ref20">20</xref>). Another study by Jung et al. (<xref ref-type="bibr" rid="ref21">21</xref>) evaluated the performance of ChatGPT in answering questions from the German state examinations for medical students. While the bot was able to pass both parts of the exam, it fared better with questions on facts and definitions and worse with questions necessitating an understanding of complex relationships and multimodal diagnostics or applied knowledge (<xref ref-type="bibr" rid="ref21">21</xref>). The importance of a multidisciplinary approach at specialized centers for sarcoma patients has been well documented (<xref ref-type="bibr" rid="ref3 ref4 ref5 ref6 ref7">3&#x2013;7</xref>), and it is considered a prerequisite for optimal patient care (<xref ref-type="bibr" rid="ref3">3</xref>). We therefore believe that sarcoma patients should be discouraged from using ChatGPT as a source of information for treatment options and approaches.</p>
<p>In conclusion, the answers ChatGPT provided on a rare disease, such as sarcoma, were found to be of very inconsistent quality, with some answers being classified as very good and others as very poor, depending on the complexity and nature of the question. Taken the extraordinary popularity ChatGPT achieved in only a few months, sarcoma physicians should be aware of the risks of misinformation that ChatGPT poses and advise their patients accordingly. However, given that ChatGPT achieved higher scores in the evaluation of how appropriate its responses are for patients, future studies should evaluate whether it can be used by sarcoma physicians as a supervised tool to better communicate complex aspects of their disease to affected patients.</p>
</sec>
<sec sec-type="data-availability" id="sec9">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="ethics-statement" id="sec10">
<title>Ethics statement</title>
<p>An approval from our local ethic committee was not required, as the study did not involve human subjects.</p>
</sec>
<sec sec-type="author-contributions" id="sec11">
<title>Author contributions</title>
<p>MV: Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft, Methodology, Investigation, Data curation, Conceptualization. JS: Writing &#x2013; review &#x0026; editing, Supervision, Investigation. MS: Writing &#x2013; review &#x0026; editing, Formal analysis, Data curation. SS: Writing &#x2013; review &#x0026; editing, Supervision. AL: Writing &#x2013; review &#x0026; editing, Supervision, Methodology, Investigation, Conceptualization. DA: Writing &#x2013; review &#x0026; editing, Validation, Supervision, Methodology, Investigation, Formal analysis, Conceptualization.</p>
</sec>
</body>
<back>
<sec sec-type="funding-information" id="sec12">
<title>Funding</title>
<p>The author(s) declare that no financial support was received for the research, authorship, and/or publication of this article.</p>
</sec>
<sec sec-type="COI-statement" id="sec13">
<title>Conflict of interest</title>
<p>MV reports travel support by Alphamed, outside the submitted work. JS reports participation in advisory boards and invited speaker fees by PharmaMar, Bayer, Roche, Lilly, and Amgen, receipt of travel expenses by PharmaMar, Roche, Merck, Lilly, Amgen, and Bristol Myers Squibb, and research funding by PharmaMar, Roche, and Eisai, outside the submitted work. MS reports travel support by Alphamed, outside the submitted work. SS reports travel support and funding for conference participation from PharmaMar and Alphamed, and research funding from Roche Austria, outside the submitted work. AL reports institutional educational grants by Johnson &#x0026; Johnson, Alphamed and Medacta, outside the submitted work. DA reports receipt of honoraria to institution for an invited presentation from PharmaMar, outside the submitted work.</p>
</sec>
<sec id="sec100" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn id="fn0001">
<p><sup>1</sup><ext-link xlink:href="https://help.openai.com/en/articles/7102672-how-can-i-access-gpt-4" ext-link-type="uri">https://help.openai.com/en/articles/7102672-how-can-i-access-gpt-4</ext-link>; <ext-link xlink:href="https://nerdynav.com/gpt-4-statistics-facts/#how-to-access-gpt4-free-and-paid-methods" ext-link-type="uri">https://nerdynav.com/gpt-4-statistics-facts/#how-to-access-gpt4-free-and-paid-methods</ext-link></p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="ref1"><label>1.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stiller</surname> <given-names>CA</given-names></name> <name><surname>Trama</surname> <given-names>A</given-names></name> <name><surname>Serraino</surname> <given-names>D</given-names></name> <name><surname>Rossi</surname> <given-names>S</given-names></name> <name><surname>Navarro</surname> <given-names>C</given-names></name> <name><surname>Chirlaque</surname> <given-names>MD</given-names></name> <etal/></person-group>. <article-title>Descriptive epidemiology of sarcomas in Europe: report from the RARECARE project</article-title>. <source>Eur J Cancer</source>. (<year>2013</year>) <volume>49</volume>:<fpage>684</fpage>&#x2013;<lpage>95</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ejca.2012.09.011</pub-id>, PMID: <pub-id pub-id-type="pmid">23079473</pub-id></citation></ref>
<ref id="ref2"><label>2.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gage</surname> <given-names>MM</given-names></name> <name><surname>Nagarajan</surname> <given-names>N</given-names></name> <name><surname>Ruck</surname> <given-names>JM</given-names></name> <name><surname>Canner</surname> <given-names>JK</given-names></name> <name><surname>Khan</surname> <given-names>S</given-names></name> <name><surname>Giuliano</surname> <given-names>K</given-names></name> <etal/></person-group>. <article-title>Sarcomas in the United States: recent trends and a call for improved staging</article-title>. <source>Oncotarget</source>. (<year>2019</year>) <volume>10</volume>:<fpage>2462</fpage>&#x2013;<lpage>74</lpage>. doi: <pub-id pub-id-type="doi">10.18632/oncotarget.26809</pub-id>, PMID: <pub-id pub-id-type="pmid">31069009</pub-id></citation></ref>
<ref id="ref3"><label>3.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gronchi</surname> <given-names>A</given-names></name> <name><surname>Miah</surname> <given-names>AB</given-names></name> <name><surname>Dei Tos</surname> <given-names>AP</given-names></name> <name><surname>Abecassis</surname> <given-names>N</given-names></name> <name><surname>Bajpai</surname> <given-names>J</given-names></name> <name><surname>Bauer</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>ESMO guidelines committee, EURACAN and GENTURIS. Soft tissue and visceral sarcomas: ESMO-EURACAN-GENTURIS clinical practice guidelines for diagnosis, treatment and follow-up&#x2606;</article-title>. <source>Ann Oncol</source>. (<year>2021</year>) <volume>32</volume>:<fpage>1348</fpage>&#x2013;<lpage>65</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.annonc.2021.07.006</pub-id>, PMID: <pub-id pub-id-type="pmid">34303806</pub-id></citation></ref>
<ref id="ref4"><label>4.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nakayama</surname> <given-names>R</given-names></name> <name><surname>Mori</surname> <given-names>T</given-names></name> <name><surname>Okita</surname> <given-names>Y</given-names></name> <name><surname>Shiraishi</surname> <given-names>Y</given-names></name> <name><surname>Endo</surname> <given-names>M</given-names></name></person-group>. <article-title>A multidisciplinary approach to soft-tissue sarcoma of the extremities</article-title>. <source>Expert Rev Anticancer Ther</source>. (<year>2020</year>) <volume>20</volume>:<fpage>893</fpage>&#x2013;<lpage>900</lpage>. doi: <pub-id pub-id-type="doi">10.1080/14737140.2020.1814150</pub-id></citation></ref>
<ref id="ref5"><label>5.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pollock</surname> <given-names>RE</given-names></name> <name><surname>Payne</surname> <given-names>JE</given-names></name> <name><surname>Rogers</surname> <given-names>AD</given-names></name> <name><surname>Smith</surname> <given-names>SM</given-names></name> <name><surname>Iwenofu</surname> <given-names>OH</given-names></name> <name><surname>Valerio</surname> <given-names>IL</given-names></name> <etal/></person-group>. <article-title>Multidisciplinary sarcoma care</article-title>. <source>Curr Probl Surg</source>. (<year>2018</year>) <volume>55</volume>:<fpage>517</fpage>&#x2013;<lpage>80</lpage>. doi: <pub-id pub-id-type="doi">10.1067/j.cpsurg.2018.10.006</pub-id></citation></ref>
<ref id="ref6"><label>6.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Strauss</surname> <given-names>SJ</given-names></name> <name><surname>Frezza</surname> <given-names>AM</given-names></name> <name><surname>Abecassis</surname> <given-names>N</given-names></name> <name><surname>Bajpai</surname> <given-names>J</given-names></name> <name><surname>Bauer</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>Bone sarcomas: ESMO-EURACAN-GENTURIS-ERN Paed can clinical practice guideline for diagnosis, treatment and follow-up</article-title>. <source>Ann Oncol</source>. (<year>2021</year>) <volume>32</volume>:<fpage>1520</fpage>&#x2013;<lpage>36</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.annonc.2021.08.1995</pub-id>, PMID: <pub-id pub-id-type="pmid">34500044</pub-id></citation></ref>
<ref id="ref7"><label>7.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Str&#x00F6;nisch</surname> <given-names>A</given-names></name> <name><surname>M&#x00E4;rdian</surname> <given-names>S</given-names></name> <name><surname>Fl&#x00F6;rcken</surname> <given-names>A</given-names></name></person-group>. <article-title>Centralized and interdisciplinary therapy Management in the Treatment of sarcomas</article-title>. <source>Life (Basel)</source>. (<year>2023</year>) <volume>13</volume>:<fpage>979</fpage>. doi: <pub-id pub-id-type="doi">10.3390/life13040979</pub-id>, PMID: <pub-id pub-id-type="pmid">37109507</pub-id></citation></ref>
<ref id="ref8"><label>8.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Biswas</surname> <given-names>SS</given-names></name></person-group>. <article-title>Role of chat GPT in public health</article-title>. <source>Ann Biomed Eng</source>. (<year>2023</year>) <volume>51</volume>:<fpage>868</fpage>&#x2013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10439-023-03172-7</pub-id>, PMID: <pub-id pub-id-type="pmid">36920578</pub-id></citation></ref>
<ref id="ref9"><label>9.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sallam</surname> <given-names>M</given-names></name></person-group>. <article-title>The utility of chatGPT as an example of large language models in healthcare education, research and practice: systematic review on the future perspectives and potential limitations</article-title>. <source>med Rxiv</source>. (<year>2023</year>) <volume>2023</volume>:<fpage>2</fpage>. doi: <pub-id pub-id-type="doi">10.1101/2023.02.19.23286155</pub-id></citation></ref>
<ref id="ref10"><label>10.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gilson</surname> <given-names>A</given-names></name> <name><surname>Safranek</surname> <given-names>CW</given-names></name> <name><surname>Huang</surname> <given-names>T</given-names></name> <name><surname>Socrates</surname> <given-names>V</given-names></name> <name><surname>Chi</surname> <given-names>L</given-names></name> <name><surname>Taylor</surname> <given-names>RA</given-names></name> <etal/></person-group>. <article-title>How does chat GPT perform on the United States medical licensing examination? The implications of large language models for medical education and knowledge assessment</article-title>. <source>JMIR Med Educ</source>. (<year>2023</year>) <volume>9</volume>:<fpage>e45312</fpage>. doi: <pub-id pub-id-type="doi">10.2196/45312</pub-id>, PMID: <pub-id pub-id-type="pmid">36753318</pub-id></citation></ref>
<ref id="ref11"><label>11.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kung</surname> <given-names>TH</given-names></name> <name><surname>Cheatham</surname> <given-names>M</given-names></name> <name><surname>Medenilla</surname> <given-names>A</given-names></name> <name><surname>Sillos</surname> <given-names>C</given-names></name> <name><surname>De Leon</surname> <given-names>L</given-names></name> <name><surname>Elepa&#x00F1;o</surname> <given-names>C</given-names></name> <etal/></person-group>. <article-title>Performance of chat GPT on USMLE: potential for AI-assisted medical education using large language models. PLOS digital</article-title>. <source>Health</source>. (<year>2023</year>) <volume>2</volume>:<fpage>e0000198</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pdig.0000198</pub-id>, PMID: <pub-id pub-id-type="pmid">36812645</pub-id></citation></ref>
<ref id="ref12"><label>12.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hill-Yardin</surname> <given-names>EL</given-names></name> <name><surname>Hutchinson</surname> <given-names>MR</given-names></name> <name><surname>Laycock</surname> <given-names>R</given-names></name> <name><surname>Spencer</surname> <given-names>SJ</given-names></name></person-group>. <article-title>A chat (GPT) about the future of scientific publishing</article-title>. <source>Brain Behav Immun</source>. (<year>2023</year>) <volume>110</volume>:<fpage>152</fpage>&#x2013;<lpage>4</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.bbi.2023.02.022</pub-id>, PMID: <pub-id pub-id-type="pmid">36868432</pub-id></citation></ref>
<ref id="ref13"><label>13.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Orr&#x00F9;</surname> <given-names>G</given-names></name> <name><surname>Piarulli</surname> <given-names>A</given-names></name> <name><surname>Conversano</surname> <given-names>C</given-names></name> <name><surname>Gemignani</surname> <given-names>A</given-names></name></person-group>. <article-title>Human-like problem-solving abilities in large language models using chat GPT</article-title>. <source>Front Artif Intell</source>. (<year>2023</year>) <volume>6</volume>:<fpage>1199350</fpage>. doi: <pub-id pub-id-type="doi">10.3389/frai.2023.1199350</pub-id>, PMID: <pub-id pub-id-type="pmid">37293238</pub-id></citation></ref>
<ref id="ref14"><label>14.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ayers</surname> <given-names>JW</given-names></name> <name><surname>Poliak</surname> <given-names>A</given-names></name> <name><surname>Dredze</surname> <given-names>M</given-names></name> <name><surname>Leas</surname> <given-names>EC</given-names></name> <name><surname>Zhu</surname> <given-names>Z</given-names></name> <name><surname>Kelley</surname> <given-names>JB</given-names></name> <etal/></person-group>. <article-title>Comparing physician and artificial intelligence Chatbot responses to patient questions posted to a public social media forum</article-title>. <source>JAMA Intern Med</source>. (<year>2023</year>) <volume>183</volume>:<fpage>589</fpage>&#x2013;<lpage>96</lpage>. doi: <pub-id pub-id-type="doi">10.1001/jamainternmed.2023.1838</pub-id>, PMID: <pub-id pub-id-type="pmid">37115527</pub-id></citation></ref>
<ref id="ref15"><label>15.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>D</given-names></name> <name><surname>Goodman</surname> <given-names>R</given-names></name> <name><surname>Patrinely</surname> <given-names>J</given-names></name> <name><surname>Stone</surname> <given-names>C</given-names></name> <name><surname>Zimmerman</surname> <given-names>E</given-names></name> <name><surname>Donald</surname> <given-names>R</given-names></name> <etal/></person-group>. <article-title>Assessing the accuracy and reliability of AI-generated medical responses: an evaluation of the chat-GPT model</article-title>. <source>Res Sq [Preprint]</source>. (<year>2023</year>). doi: <pub-id pub-id-type="doi">10.21203/rs.3.rs-2566942/v1</pub-id></citation></ref>
<ref id="ref16"><label>16.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Karako</surname> <given-names>K</given-names></name> <name><surname>Song</surname> <given-names>P</given-names></name> <name><surname>Chen</surname> <given-names>Y</given-names></name> <name><surname>Tang</surname> <given-names>W</given-names></name></person-group>. <article-title>New possibilities for medical support systems utilizing artificial intelligence (AI) and data platforms</article-title>. <source>Biosci Trends</source>. (<year>2023</year>) <volume>17</volume>:<fpage>186</fpage>&#x2013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.5582/bst.2023.01138</pub-id></citation></ref>
<ref id="ref17"><label>17.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chow</surname> <given-names>JCL</given-names></name> <name><surname>Sanders</surname> <given-names>L</given-names></name> <name><surname>Li</surname> <given-names>K</given-names></name></person-group>. <article-title>Impact of chat GPT on medical chatbots as a disruptive technology</article-title>. <source>Front Artif Intell</source>. (<year>2023</year>) <volume>6</volume>:<fpage>1166014</fpage>. doi: <pub-id pub-id-type="doi">10.3389/frai.2023.1166014</pub-id>, PMID: <pub-id pub-id-type="pmid">37091303</pub-id></citation></ref>
<ref id="ref18"><label>18.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Semrl</surname> <given-names>N</given-names></name> <name><surname>Feigl</surname> <given-names>S</given-names></name> <name><surname>Taumberger</surname> <given-names>N</given-names></name> <name><surname>Bracic</surname> <given-names>T</given-names></name> <name><surname>Fluhr</surname> <given-names>H</given-names></name> <name><surname>Blockeel</surname> <given-names>C</given-names></name> <etal/></person-group>. <article-title>AI language models in human reproduction research: exploring chat GPT's potential to assist academic writing</article-title>. <source>Hum Reprod</source>. (<year>2023</year>) <volume>38</volume>:<fpage>2281</fpage>&#x2013;<lpage>8</lpage>. doi: <pub-id pub-id-type="doi">10.1093/humrep/dead207</pub-id>, PMID: <pub-id pub-id-type="pmid">37833847</pub-id></citation></ref>
<ref id="ref19"><label>19.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Uz</surname> <given-names>C</given-names></name> <name><surname>Umay</surname> <given-names>E</given-names></name></person-group>. <article-title>"Dr chat GPT": is it a reliable and useful source for common rheumatic diseases?</article-title> <source>Int J Rheum Dis</source>. (<year>2023</year>) <volume>26</volume>:<fpage>1343</fpage>&#x2013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.1111/1756-185X.14749</pub-id>, PMID: <pub-id pub-id-type="pmid">37218530</pub-id></citation></ref>
<ref id="ref20"><label>20.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hoch</surname> <given-names>CC</given-names></name> <name><surname>Wollenberg</surname> <given-names>B</given-names></name> <name><surname>L&#x00FC;ers</surname> <given-names>JC</given-names></name> <name><surname>Knoedler</surname> <given-names>S</given-names></name> <name><surname>Knoedler</surname> <given-names>L</given-names></name> <name><surname>Frank</surname> <given-names>K</given-names></name> <etal/></person-group>. <article-title>Chat GPT's quiz skills in different otolaryngology subspecialties: an analysis of 2576 single-choice and multiple-choice board certification preparation questions</article-title>. <source>Eur Arch Otorrinolaringol</source>. (<year>2023</year>) <volume>280</volume>:<fpage>4271</fpage>&#x2013;<lpage>8</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00405-023-08051-4</pub-id>, PMID: <pub-id pub-id-type="pmid">37285018</pub-id></citation></ref>
<ref id="ref21"><label>21.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jung</surname> <given-names>LB</given-names></name> <name><surname>Gudera</surname> <given-names>JA</given-names></name> <name><surname>Wiegand</surname> <given-names>TLT</given-names></name> <name><surname>Allmendinger</surname> <given-names>S</given-names></name> <name><surname>Dimitriadis</surname> <given-names>K</given-names></name> <name><surname>Koerte</surname> <given-names>IK</given-names></name></person-group>. <article-title>Chat GPT passes German state examination in medicine with picture questions omitted</article-title>. <source>Dtsch Arztebl Int</source>. (<year>2023</year>) <volume>120</volume>:<fpage>373</fpage>&#x2013;<lpage>4</lpage>. doi: <pub-id pub-id-type="doi">10.3238/arztebl.m2023.0113</pub-id>, PMID: <pub-id pub-id-type="pmid">37530052</pub-id></citation></ref>
<ref id="ref22"><label>22.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Leithner</surname> <given-names>A</given-names></name> <name><surname>Maurer-Ertl</surname> <given-names>W</given-names></name> <name><surname>Glehr</surname> <given-names>M</given-names></name> <name><surname>Friesenbichler</surname> <given-names>J</given-names></name> <name><surname>Leithner</surname> <given-names>K</given-names></name> <name><surname>Windhager</surname> <given-names>R</given-names></name></person-group>. <article-title>Wikipedia and osteosarcoma: a trustworthy patients' information?</article-title> <source>J Am Med Inform Assoc</source>. (<year>2010</year>) <volume>17</volume>:<fpage>373</fpage>&#x2013;<lpage>4</lpage>. doi: <pub-id pub-id-type="doi">10.1136/jamia.2010.004507</pub-id>, PMID: <pub-id pub-id-type="pmid">20595302</pub-id></citation></ref>
<ref id="ref23"><label>23.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schippinger</surname> <given-names>M</given-names></name> <name><surname>Ruckenstuhl</surname> <given-names>P</given-names></name> <name><surname>Friesenbichler</surname> <given-names>J</given-names></name> <name><surname>Leithner</surname> <given-names>A</given-names></name></person-group>. <article-title>Osteosarcoma: reliability and quality of the information in the internet</article-title>. <source>Wien Med Wochenschr</source>. (<year>2014</year>) <volume>164</volume>:<fpage>353</fpage>&#x2013;<lpage>7</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10354-014-0304-y</pub-id>, PMID: <pub-id pub-id-type="pmid">25205186</pub-id></citation></ref>
<ref id="ref24"><label>24.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zade</surname> <given-names>RT</given-names></name> <name><surname>Tartaglione</surname> <given-names>JP</given-names></name> <name><surname>Chisena</surname> <given-names>E</given-names></name> <name><surname>Adams</surname> <given-names>CT</given-names></name> <name><surname>DiCaprio</surname> <given-names>MR</given-names></name></person-group>. <article-title>The quality of online Orthopaedic oncology information</article-title>. <source>J Am Acad Orthop Surg Glob Res Rev</source>. (<year>2020</year>) <volume>4</volume>:<fpage>e19.00181</fpage>. doi: <pub-id pub-id-type="doi">10.5435/JAAOSGlobal-D-19-00181</pub-id>, PMID: <pub-id pub-id-type="pmid">32440631</pub-id></citation></ref>
</ref-list>
</back>
</article>