<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Surg.</journal-id>
<journal-title>Frontiers in Surgery</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Surg.</abbrev-journal-title>
<issn pub-type="epub">2296-875X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fsurg.2022.939079</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Surgery</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Developing and validating a multivariable prediction model for predicting the cost of colon surgery</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes"><name><surname>Taha</surname><given-names>Anas</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref>
<xref ref-type="author-notes" rid="an1"><sup>&#x2020;</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/1622820/overview"/></contrib>
<contrib contrib-type="author"><name><surname>Taha-Mehlitz</surname><given-names>Stephanie</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="author-notes" rid="an1"><sup>&#x2020;</sup></xref></contrib>
<contrib contrib-type="author"><name><surname>Ochs</surname><given-names>Vincent</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2048952/overview" /></contrib>
<contrib contrib-type="author"><name><surname>Enodien</surname><given-names>Bassey</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/1636349/overview" /></contrib>
<contrib contrib-type="author"><name><surname>Honaker</surname><given-names>Michael D.</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref></contrib>
<contrib contrib-type="author"><name><surname>Frey</surname><given-names>Daniel M.</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="an2"><sup>&#x2021;</sup></xref></contrib>
<contrib contrib-type="author"><name><surname>Cattin</surname><given-names>Philippe C.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="an2"><sup>&#x2021;</sup></xref></contrib>
</contrib-group>
<aff id="aff1"><label><sup>1</sup></label><addr-line>Department of Biomedical Engineering, Faculty of Medicine</addr-line>, <institution>University of Basel</institution>, <addr-line>Allschwill</addr-line>, <country>Switzerland</country></aff>
<aff id="aff2"><label><sup>2</sup></label><addr-line>Department of Surgery</addr-line>, <institution>GZO Hospital</institution>, <addr-line>Wetzikon</addr-line>, <country>Switzerland</country></aff>
<aff id="aff3"><label><sup>3</sup></label><addr-line>Clarunis, University Centre for Gastrointestinal and Liver Diseases</addr-line>, <institution>St. Clara Hospital and University Hospital Basel</institution>, <addr-line>Basel</addr-line>, <country>Switzerland</country></aff>
<aff id="aff4"><label><sup>4</sup></label><addr-line>Department of Surgical Oncology and Colorectal Surgery</addr-line>, <institution>East Carolina University, Brody School of Medicine</institution>, <addr-line>Greenville, NC</addr-line>, <country>United States</country></aff>
<author-notes>
<fn fn-type="edited-by"><p><bold>Edited by:</bold> Falk Rauchfu&#x00DF;, Friedrich Schiller University Jena, Germany</p></fn>
<fn fn-type="edited-by"><p><bold>Reviewed by:</bold> Dasharathraj K. Shetty, Manipal Institute of Technology, India Daniel Hartmann, University Hospital Rechts der Isar, Technical University of Munich, Germany</p></fn>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Anas Taha <email>anas.taha@unibas.ch</email></corresp>
<fn id="an1"><label><sup>&#x2020;</sup></label><p>These authors have contributed equally to this work and share first authorship</p></fn>
<fn id="an2"><label><sup>&#x2021;</sup></label><p>These authors have contributed equally to this work and share last authorship</p></fn>
<fn fn-type="other" id="fn001"><p><bold>Specialty Section:</bold> This article was submitted to Visceral Surgery, a section of the journal Frontiers in Surgery</p></fn>
</author-notes>
<pub-date pub-type="epub"><day>07</day><month>11</month><year>2022</year></pub-date>
<pub-date pub-type="collection"><year>2022</year></pub-date>
<volume>9</volume><elocation-id>939079</elocation-id>
<history>
<date date-type="received"><day>08</day><month>05</month><year>2022</year></date>
<date date-type="accepted"><day>11</day><month>10</month><year>2022</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2022 Taha, Taha-Mehlitz, Ochs, Enodien, Honaker, Frey and Cattin.</copyright-statement>
<copyright-year>2022</copyright-year><copyright-holder>Taha, Taha-Mehlitz, Ochs, Enodien, Honaker, Frey and Cattin</copyright-holder><license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Hospitals are burdened with predicting, calculating, and managing various cost-affecting parameters regarding patients and their treatments. Accuracy in cost prediction is further affected when a patient suffers from other health issues that hinder the traditional prognosis. This can lead to an unavoidable deficit in the final revenue of medical centers. This study aims to determine whether machine learning (ML) algorithms can predict cost factors based on patients undergoing colon surgery. For the forecasting, multiple predictors will be taken into the model to provide a tool that can be helpful for hospitals to manage their costs, ultimately leading to operating more cost-efficiently. This proof of principle will lay the groundwork for an efficient ML-based prediction tool based on multicenter data from a range of international centers in the subsequent phases of the study. With a mean absolute percentage error result of 18&#x0025;&#x2013;25.6&#x0025;, our model&#x0027;s prediction showed decent results in forecasting the costs regarding various diagnosed factors and surgical approaches. There is an urgent need for further studies on predicting cost factors, especially for cases with anastomotic leakage, to minimize unnecessary hospital costs.</p>
</abstract>
<kwd-group>
<kwd>cost prediction</kwd>
<kwd>colon surgery</kwd>
<kwd>machine learning</kwd>
<kwd>colon surgery cost</kwd>
<kwd>anastomotic insufficiency</kwd>
</kwd-group><counts>
<fig-count count="3"/>
<table-count count="4"/><equation-count count="0"/><ref-count count="21"/><page-count count="0"/><word-count count="0"/></counts>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><title>Introduction</title>
<sec id="s1a"><title>Background</title>
<p>Colorectal cancer (CRC) is one of the most prevalent cancers in the world today based on diagnoses, with about 1.8 million cases being diagnosed and about 0.7 million related deaths occurring annually. In addition, CRC accounts for 10&#x0025; of all newly diagnosed cancers, a considerable social and economic burden for many nations worldwide (<xref ref-type="bibr" rid="B1">1</xref>). One of the treatment modalities for colorectal cancer is surgery. Surgery is aimed at obtaining an adequate oncologic resection while re-establishing intestinal continuity. Over time, there have been improvements in the way the disease is treated. However, existing patient comorbidities can limit surgical procedures. The time required to prepare patients for surgery and address their comorbidities contribute to increased surgical costs. However, despite many improvements, significant other complications still occur during, and especially after, a surgical procedure. To avoid this, the patient is placed in necessary postoperative care for 5 and 7 days after a surgical operation. Other postoperative risk factors will further add to the surgical cost, but their prediction is very vague due to the absence of sufficient datasets. These involve performing a colorectal anastomosis, anastomotic leak (<xref ref-type="bibr" rid="B2">2</xref>), delirium or prolonged ileus (<xref ref-type="bibr" rid="B3">3</xref>), other emergency surgeries; longer intraoperative time; and peritoneal contamination.</p>
<p>The comorbidities and longer stays result in a cost burden for patients and hospitals. This is why prediction models are now being updated to determine the costs for anastomotic insufficiency. Prediction models are normally used to estimate the probability of achieving a particular outcome (<xref ref-type="bibr" rid="B4">4</xref>). Many prediction models have been developed, but only a small number are used because not all models accurately predict the desired outcome (<xref ref-type="bibr" rid="B5">5</xref>). This study focuses on developing and validating a multivariable prediction model to predict costs for patients undergoing colon surgery while considering their stay in the hospital. This will help determine the cost burden due to variable hospital length of stay (LOS) and days spent in intensive care units (ICUs). The medical context is prognostic in that it is focused on predicting the cost of overall expenditure involved in colon surgery for the clinical center and the patient.</p>
</sec>
<sec id="s1b"><title>Rationale</title>
<p>The rationale for developing and validating the multivariable model is that it will help accurately predict the costs associated with colon surgery. The accurate prediction will help patients and practices employed by the hospital make more informed decisions, as well as aid in policies enacted by the government. The results that come with the use of the model will also aid in surgical planning. In short, developing and validating the multivariable model will provide insight into the costs of colon surgery. In turn, it will allow revisions in care and help develop strategies for improved management. Similar studies for prediction purposes have been conducted in the field of medicine. For example, Musunuri et al. have used machine learning in the form of artificial intelligence to predict 90-day liver disease mortality. Focused on acute-on-chronic liver failure, they achieved a model with an accuracy of 94.12&#x0025; and an area under the curve of 0.915 (<xref ref-type="bibr" rid="B6">6</xref>). Hameed et al. wrote about the impact of artificial intelligence on urological diseases. In their literature review, they have pointed to multiple publications using various models like support vector machine, nearest neighbor, random forest, convolutional neural network, or artificial neural networks to predict and classify diseases like prostate cancer, urothelial cancer, renal cancer, or urolithiasis. What differs between those publications and their work from ours is that they use a classification model instead of a regression model. The most important benefit of using a regression model compared to a classification model is that it helps predict continuous values, whereas classification models try to predict discrete class labels. To predict the costs associated with colon surgery in an accurate way, a machine learning regression model is used. Using this approach, we aim to contribute to an existing gap in this field (<xref ref-type="bibr" rid="B7">7</xref>).</p>
</sec>
<sec id="s1c"><title>Objectives</title>
<list list-type="simple">
<list-item><label>&#x2022;</label><p>To develop prediction models for the final costs in patients based on multiple predictors.</p></list-item>
<list-item><label>&#x2022;</label><p>To test the models in terms of their ability to accurately predict the final costs associated with colon surgery in patients.</p></list-item>
</list>
</sec>
</sec>
<sec id="s2" sec-type="methods"><title>Methods</title>
<sec id="s2a"><title>Overview and data collection</title>
<p>Data were extracted from a registry of patients who underwent colonic anastomosis for various reasons such as tumors, diverticulitis, mesenteric ischemia, iatrogenic or traumatic perforation, or inflammatory bowel disease (aggregated as &#x201C;nontumor&#x201D;) at the Hospital of Wetzikon from January 1, 2013, to December 31, 2019. No patients were excluded from the initial data collection. Furthermore, this study was completed based on the transparent reporting of a multivariable prediction model for individual prognosis or diagnosis (TRIPOD) statement checklist for prediction model development (<xref ref-type="bibr" rid="B8">8</xref>).</p>
<p>Utilizing these data, we developed a machine learning model to predict the costs of colon surgery.</p>
</sec>
<sec id="s2b"><title>Ethical considerations</title>
<p>The registry data were approved by an institutional review board, where the patients&#x2019; informed consent was waived. The study was registered at [Req 2021&#x2013;01107].</p>
</sec>
<sec id="s2c"><title>Predictors and outcome measures</title>
<p>Recorded variables include insurance (general/semiprivate/private), age, surgical procedure (Hartmann/left-sided hemicolectomy and extended left-sided hemicolectomy/right-sided hemicolectomy and extended right-sided hemicolectomy/sigmoid resection), surgical approach (open/laparoscopic), diagnosis (tumor/nontumor), final cost (the sum of all cost factors), length of stay (in days), intensive care unit stay (in days), operation time (in minutes), anesthesia time (in minutes), ASA score (I, II, III, IV), gender (male/female), CCI (Charlson comorbidity index), anastomotic insufficiency, and emergent/nonemergent. The data on the final cost, which is the sum of all the costs incurred during the stay in the hospital for surgery, were collected in CHF (Swiss Francs). Other cost factors are not incorporated since they add up to the final costs including administrative costs, costs of hospitality, nurse costs, costs of infrastructure, doctor costs, medical costs, operational costs, anesthesia costs, and care costs.</p>
</sec>
<sec id="s2d"><title>Model development</title>
<p>Data were randomly split into two sets; 80&#x0025; of the data was put into a training set to build the models, and 20&#x0025; was utilized for a test set to validate the models and assess their performance internally. The two sets had approximately the same class distribution (Gaussian). The following 14 predictors were chosen to predict the final costs based on regression and clinical insights: age, gender, insurance, diagnosis, operation, surgical approach, hospitalization, intensive care unit stay, surgical procedure, anesthesia time, CCI, ASA score, anastomotic insufficiency, and emergency surgery (<xref ref-type="bibr" rid="B9">9</xref>).</p>
<p>By including variables such as the CCI and the ASA score, we can cover a large number of diseases that are included in the comorbidity index.</p>
<p>A variety of machine learning models were developed, including generalized boosted regression, random forest, and decision trees. An interaction depth of 3 and a total number of 500 trees were chosen, as were the type of random forest and the regression model. The classification/predictive performance was measured using the mean absolute percentage error (MAPE), where a result of &#x003C;10&#x0025; was classified as highly accurate, &#x003C;20&#x0025; denoted a good forecast, 20&#x0025;&#x2013;50&#x0025; denoted a reasonable forecast, and everything &#x003E;50&#x0025; denoted an inaccurate forecast (<xref ref-type="bibr" rid="B10">10</xref>). The MAPE factor, also known as mean absolute percentage deviation, was used for accuracy of a forecasting prediction. Continuous data were reported as mean&#x2009;&#x00B1;&#x2009;standard deviation (SD) or median [interquartile range (IQR)] and categorical data were reported as numbers (percentages). Hyperparameters were tuned, and the final model was selected based on the MAPE. The final model chosen was the random forest model based on its superior performance.</p>
<p>The analysis was carried out using R version 4.0.4. The random forest library was used for the random forest models, the metrics library used was used for the calculation of the performance measurements, the gbm library was used for the generalized boosted regression models, and the rpart library was used for the other models.</p>
</sec>
<sec id="s2e"><title>Deployment</title>
<p>The best-performing model will be deployed as a web-based, user-friendly application using RShiny to predict the final cost that considers the different cost factors. (Accessed at: <ext-link ext-link-type="uri" xlink:href="https://colonsurgerycost.shinyapps.io/Final_Cost/">https://colonsurgerycost.shinyapps.io/Final_Cost/</ext-link>).</p>
</sec>
</sec>
<sec id="s3" sec-type="results"><title>Results</title>
<sec id="s3a"><title>Cohort</title>
<p>A total of 347 patients were included in our study. This number consists of all patients from the center who suffered from the diagnosed factors in this section and had to undergo the type of operations mentioned. The mean age was 67&#x2009;&#x00B1;&#x2009;14 years (range 28&#x2013;94). A total of 162 (47&#x0025;) patients were men, and 185 (53&#x0025;) were women. <xref ref-type="table" rid="T1">Tables&#x00A0;1</xref> and <xref ref-type="table" rid="T2">2</xref> provide all baseline variables and their descriptive statistics. Continuous variables were recorded as mean&#x2009;&#x00B1;&#x2009;SD (range) in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>.</p>
<table-wrap id="T1" position="float"><label>Table 1</label>
<caption><p>Variable characteristics for continuous values.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Variable</th>
<th valign="top" align="center">Overall (<italic>n</italic>)</th>
<th valign="top" align="center">Mean (SD)</th>
<th valign="top" align="center">Min</th>
<th valign="top" align="center">Max</th>
<th valign="top" align="center">Range</th>
<th valign="top" align="center">Median (IQR)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top">Age</td>
<td valign="top" align="center">347</td>
<td valign="top" align="center">67 (14)</td>
<td valign="top" align="center">28</td>
<td valign="top" align="center">94</td>
<td valign="top" align="center">66</td>
<td valign="top" align="center">68</td>
</tr>
<tr>
<td valign="top">Hospital days</td>
<td valign="top" align="center">347</td>
<td valign="top" align="center">9 (10)</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">84</td>
<td valign="top" align="center">83</td>
<td valign="top" align="center">5</td>
</tr>
<tr>
<td valign="top">ICU days</td>
<td valign="top" align="center">347</td>
<td valign="top" align="center">1 (5)</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">70</td>
<td valign="top" align="center">70</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top">Operation time</td>
<td valign="top" align="center">347</td>
<td valign="top" align="center">175 (102)</td>
<td valign="top" align="center">23</td>
<td valign="top" align="center">1.280</td>
<td valign="top" align="center">1.257</td>
<td valign="top" align="center">154</td>
</tr>
<tr>
<td valign="top">Anesthesia time</td>
<td valign="top" align="center">347</td>
<td valign="top" align="center">119 (90)</td>
<td valign="top" align="center">45</td>
<td valign="top" align="center">1.020</td>
<td valign="top" align="center">997</td>
<td valign="top" align="center">95</td>
</tr>
<tr>
<td valign="top">Final cost</td>
<td valign="top" align="center">347</td>
<td valign="top" align="center">&#x2212;32.502 (45.650)</td>
<td valign="top" align="center">&#x2212;52.0591</td>
<td valign="top" align="center">&#x2212;7.485</td>
<td valign="top" align="center">52.8076</td>
<td valign="top" align="center">&#x2212;20.011</td>
</tr>
<tr>
<td valign="top">CCI</td>
<td valign="top" align="center">347</td>
<td valign="top" align="center">4 (3)</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">16</td>
<td valign="top" align="center">16</td>
<td valign="top" align="center">3</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="table-fn1"><p>SD, standard deviation; IQR, interquartile range; ICU, intensive care unit; CCI, Charlson comorbidity index.</p></fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T2" position="float"><label>Table 2</label>
<caption><p>Variable characteristics for categorical values.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Variable</th>
<th valign="top" align="center"><italic>n</italic> (&#x0025;)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="2">Gender</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Male</td>
<td valign="top" align="center">162 (47&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Female</td>
<td valign="top" align="center">185 (53&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" colspan="2">Insurance</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;General</td>
<td valign="top" align="center">283 (82&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Semiprivate</td>
<td valign="top" align="center">49 (14&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Private</td>
<td valign="top" align="center">15 (4&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" colspan="2">Diagnosis</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Tumor</td>
<td valign="top" align="center">162 (47&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Nontumor</td>
<td valign="top" align="center">185 (53&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" colspan="2">Emergency surgery</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;No</td>
<td valign="top" align="center">331 (95&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Yes</td>
<td valign="top" align="center">16 (5&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" colspan="2">Operation</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Hartmann&#x0027;s procedure</td>
<td valign="top" align="center">19 (5&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Hemicolectomy left</td>
<td valign="top" align="center">16 (4&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Extended hemicolectomy left</td>
<td valign="top" align="center">6 (2&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Hemicolectomy right</td>
<td valign="top" align="center">82 (24&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Extended hemicolectomy right</td>
<td valign="top" align="center">6 (2&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Sigmoid resection</td>
<td valign="top" align="center">218 (63&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" colspan="2">Surgery approach</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Open</td>
<td valign="top" align="center">153 (44&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Laparoscopic</td>
<td valign="top" align="center">194 (56&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" colspan="2">Anastomotic insufficiency</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;No</td>
<td valign="top" align="center">331 (95&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Yes</td>
<td valign="top" align="center">16 (5&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" colspan="2">ASA score</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;I</td>
<td valign="top" align="center">12 (4&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;II</td>
<td valign="top" align="center">184 (53&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;III</td>
<td valign="top" align="center">137 (39&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;IV</td>
<td valign="top" align="center">14 (4&#x0025;)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Categorical variables were recorded as numbers (&#x0025;) in <xref ref-type="table" rid="T2">Table&#x00A0;2</xref>. No missing values were detected. <xref ref-type="table" rid="T3">Table&#x00A0;3</xref> provides the variables&#x0027; characteristics and descriptive statistics that are not mentioned in <xref ref-type="table" rid="T1">Tables&#x00A0;1</xref> and <xref ref-type="table" rid="T2">2</xref> and are based on their impact on the final costs.</p>
<table-wrap id="T3" position="float"><label>Table 3</label>
<caption><p>Descriptive statistics based on final costs.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Variable</th>
<th valign="top" align="center">Overall (n)</th>
<th valign="top" align="center">Mean (SD)</th>
<th valign="top" align="center">Min</th>
<th valign="top" align="center">Max</th>
<th valign="top" align="center">Median (Q1, Q3)</th>
<th valign="top" align="center"><italic>P</italic>-value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Insurance</td>
<td valign="top" align="center">347</td>
<td valign="top" align="center">&#x2212;32.502 (45.650)</td>
<td valign="top" align="center">&#x2212;520.591</td>
<td valign="top" align="center">&#x2212;7.485</td>
<td valign="top" align="center">&#x2212;20.011 (&#x2212;28.828, &#x2212;15.332)</td>
<td valign="top" align="center">0.643</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;General</td>
<td valign="top" align="center">283</td>
<td valign="top" align="center">&#x2212;31.773 (47.490)</td>
<td valign="top" align="center">&#x2212;520.591</td>
<td valign="top" align="center">&#x2212;7.485</td>
<td valign="top" align="center">&#x2212;18.433 (&#x2212;27.464, &#x2212;14.823)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Semiprivate</td>
<td valign="top" align="center">49</td>
<td valign="top" align="center">&#x2212;33.495 (27.892)</td>
<td valign="top" align="center">&#x2212;192.811</td>
<td valign="top" align="center">&#x2212;10.919</td>
<td valign="top" align="center">&#x2212;22.645 (&#x2212;40.043, &#x2212;19.795)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Private</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">&#x2212;42.996 (57.232)</td>
<td valign="top" align="center">&#x2212;241.331</td>
<td valign="top" align="center">&#x2212;13.915</td>
<td valign="top" align="center">&#x2212;23.979 (&#x2212;39.017, &#x2212;18.426)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">Diagnosis</td>
<td valign="top" align="center">347</td>
<td valign="top" align="center">&#x2212;32.502 (45.650)</td>
<td valign="top" align="center">&#x2212;520.591</td>
<td valign="top" align="center">&#x2212;7.485</td>
<td valign="top" align="center">&#x2212;20.011 (&#x2212;28.828, &#x2212;15.332)</td>
<td valign="top" align="center">0.842</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Tumor</td>
<td valign="top" align="center">162</td>
<td valign="top" align="center">&#x2212;33.025 (40.120)</td>
<td valign="top" align="center">&#x2212;298.957</td>
<td valign="top" align="center">&#x2212;7.485</td>
<td valign="top" align="center">&#x2212;21.129 (&#x2212;28.294, &#x2212;15.790)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Nontumor</td>
<td valign="top" align="center">185</td>
<td valign="top" align="center">&#x2212;32.043 (50.098)</td>
<td valign="top" align="center">&#x2212;52.059</td>
<td valign="top" align="center">&#x2212;9.929</td>
<td valign="top" align="center">&#x2212;19.155 (&#x2212;29.688, &#x2212;14.859)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">Operation</td>
<td valign="top" align="center">347</td>
<td valign="top" align="center">&#x2212;32.502 (45.650)</td>
<td valign="top" align="center">&#x2212;52.0591</td>
<td valign="top" align="center">&#x2212;7.485</td>
<td valign="top" align="center">&#x2212;20.011 (&#x2212;28.828, &#x2212;15.332)</td>
<td valign="top" align="center">&#x003C;0.001</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Hartmann</td>
<td valign="top" align="center">19</td>
<td valign="top" align="center">&#x2212;25.479 (20.230)</td>
<td valign="top" align="center">&#x2212;75.676</td>
<td valign="top" align="center">&#x2212;7.485</td>
<td valign="top" align="center">&#x2212;18.433 (&#x2212;24.551, &#x2212;14.053)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Hemicolectomy left</td>
<td valign="top" align="center">16</td>
<td valign="top" align="center">&#x2212;65.777 (91.964)</td>
<td valign="top" align="center">&#x2212;38.419</td>
<td valign="top" align="center">&#x2212;15.045</td>
<td valign="top" align="center">&#x2212;32.297 (&#x2212;70.713, &#x2212;18.876)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Extended hemicolectomy left</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">&#x2212;11.0698 (20,122)</td>
<td valign="top" align="center">&#x2212;520.591</td>
<td valign="top" align="center">&#x2212;13.915</td>
<td valign="top" align="center">&#x2212;28.751 (&#x2212;46.173, &#x2212;22.338)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Hemicolectomy right</td>
<td valign="top" align="center">82</td>
<td valign="top" align="center">&#x2212;35,135 (39.474)</td>
<td valign="top" align="center">&#x2212;241.331</td>
<td valign="top" align="center">&#x2212;10.665</td>
<td valign="top" align="center">&#x2212;22.469 (&#x2212;35.663, &#x2212;16.468)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Extended hemicolectomy right</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">&#x2212;65.768 (114.464)</td>
<td valign="top" align="center">&#x2212;298.957</td>
<td valign="top" align="center">&#x2212;13.086</td>
<td valign="top" align="center">&#x2212;17.726 (&#x2212;29.401, &#x2212;14.799)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Sigmoid resection</td>
<td valign="top" align="center">218</td>
<td valign="top" align="center">&#x2212;26.613 (23.764)</td>
<td valign="top" align="center">&#x2212;192.811</td>
<td valign="top" align="center">&#x2212;9.379</td>
<td valign="top" align="center">&#x2212;18.684 (&#x2212;25.538, &#x2212;15.180)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">Surgery approach</td>
<td valign="top" align="center">347</td>
<td valign="top" align="center">&#x2212;32.502 (45.650)</td>
<td valign="top" align="center">&#x2212;520.591</td>
<td valign="top" align="center">&#x2212;7.485</td>
<td valign="top" align="center">&#x2212;20.011 (&#x2212;28.828, &#x2212;15.332)</td>
<td valign="top" align="center">&#x003C;0.001</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Open</td>
<td valign="top" align="center">153</td>
<td valign="top" align="center">&#x2212;46.531 (64.486)</td>
<td valign="top" align="center">&#x2212;520.591</td>
<td valign="top" align="center">&#x2212;7.485</td>
<td valign="top" align="center">&#x2212;25.989 (&#x2212;45.758, &#x2212;18.708)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Laparoscopic</td>
<td valign="top" align="center">194</td>
<td valign="top" align="center">&#x2212;21.437 (13.486)</td>
<td valign="top" align="center">&#x2212;91.098</td>
<td valign="top" align="center">&#x2212;9.379</td>
<td valign="top" align="center">&#x2212;17.275 (&#x2212;21.765, &#x2212;14.685)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">Anastomotic insufficiency</td>
<td valign="top" align="center">347</td>
<td valign="top" align="center">&#x2212;32.502 (45.650)</td>
<td valign="top" align="center">&#x2212;520.591</td>
<td valign="top" align="center">&#x2212;7.485</td>
<td valign="top" align="center">&#x2212;20.011 (&#x2212;28.828, &#x2212;15.332)</td>
<td valign="top" align="center">&#x003C;0.001</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;No</td>
<td valign="top" align="center">331</td>
<td valign="top" align="center">&#x2212;26.051 (20.763)</td>
<td valign="top" align="center">&#x2212;192.811</td>
<td valign="top" align="center">&#x2212;7.485</td>
<td valign="top" align="center">&#x2212;19.472 (&#x2212;27.204, &#x2212;15.121)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Yes</td>
<td valign="top" align="center">16</td>
<td valign="top" align="center">&#x2212;165.941 (136.653)</td>
<td valign="top" align="center">&#x2212;520.591</td>
<td valign="top" align="center">&#x2212;27.444</td>
<td valign="top" align="center">114.158 (225.666, &#x2212;78.015)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">ASA score</td>
<td valign="top" align="center">347</td>
<td valign="top" align="center">&#x2212;32.502 (45.650)</td>
<td valign="top" align="center">&#x2212;520.591</td>
<td valign="top" align="center">&#x2212;7.485</td>
<td valign="top" align="center">&#x2212;20.011 (&#x2212;28.828, &#x2212;15.332)</td>
<td valign="top" align="center">&#x003C;0.001</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;I</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">&#x2212;20.626 (5.177)</td>
<td valign="top" align="center">&#x2212;30.208</td>
<td valign="top" align="center">&#x2212;14.035</td>
<td valign="top" align="center">&#x2212;20.982 (&#x2212;23.627, &#x2212;16.212)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">&#x2003;II</td>
<td valign="top" align="center">184</td>
<td valign="top" align="center">&#x2212;23.129 (21.680)</td>
<td valign="top" align="center">&#x2212;241.331</td>
<td valign="top" align="center">&#x2212;7.485</td>
<td valign="top" align="center">&#x2212;17.938 (&#x2212;22.591, &#x2212;14.515)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">&#x2003;III</td>
<td valign="top" align="center">137</td>
<td valign="top" align="center">&#x2212;38.328 (44.857)</td>
<td valign="top" align="center">&#x2212;384.159</td>
<td valign="top" align="center">&#x2212;10.665</td>
<td valign="top" align="center">&#x2212;22.645 (&#x2212;43.274, &#x2212;16.997)</td>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">&#x2003;IV</td>
<td valign="top" align="center">14</td>
<td valign="top" align="center">&#x2212;108.844 (140.598)</td>
<td valign="top" align="center">&#x2212;520.591</td>
<td valign="top" align="center">&#x2212;20.280</td>
<td valign="top" align="center">&#x2212;53.515 (&#x2212;79.035, &#x2212;33.144)</td>
<td valign="top" align="center"/>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="table-fn2"><p>SD, standard deviation.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3b"><title>Model performance</title>
<p>During internal validation, the performance of all three models was tested and stated with their mean values and 95&#x0025; confidence intervals (<xref ref-type="table" rid="T3">Table&#x00A0;3</xref>). The random forest classifier provided the highest MAPE for predicting the final cost (21.4). Thus, it was the model with the best internal validation performance and was subsequently used for predicting costs (<xref ref-type="bibr" rid="B11">11</xref>). In comparison, the decision tree and general boosted regression model displayed results for MAPEs of only 25.5 and 29.7, respectively. Therefore, the average MAPE for the final cost is around 21.4, which means that, on average, the forecast of this prediction model regarding the final costs is off by 21.4&#x0025;. Since a MAPE value of &#x003C;20&#x0025; is considered as being &#x201C;good,&#x201D; our result shows decent results. The percentage of the random forest classifier&#x0027;s variance, which was explained in the models, varied from 73.81&#x0025; to 81.05&#x0025;. Specific feature importance according to the random forest classifier is displayed as Gini index in <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>, while <xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref> shows the prediction of the random forest classifier compared to the actual observed values from the test data set for the final cost factor.</p>
<fig id="F1" position="float"><label>Figure 1</label>
<caption><p>Total decrease in node impurities, measured by the Gini index from splitting on the variable, averaged over all trees.</p></caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fsurg-09-939079-g001.tif"/>
</fig>
<fig id="F2" position="float"><label>Figure 2</label>
<caption><p>Predicted vs. real observations of the model (ground truth).</p></caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fsurg-09-939079-g002.tif"/>
</fig>
<p>In <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>, one can see that factors such as LOS, anastomotic insufficiency, and intensive care unit stay are the best predictors in our model, which could be explained as being variables that are often correlated with postoperative complications and thus being more costly. The hospitalization factor can be explained as a good predictor of cost because the overall costs for a hospital will increase if the patient is not progressing after surgery. The same can be said about the intensive care unit. For the anastomotic insufficiency cases, it is evident that these complications bare a higher burden on the final costs. The mean decrease in the Gini index is the mean of a variable&#x0027;s total decrease in node impurity, weighted by the proportion of samples reaching that node in each individual decision tree in the random forest. A higher mean decrease in the Gini index indicates higher variable importance. In other words, node impurity measures how much the model error increases when a particular variable is randomly permuted or shuffled.</p>
<p><xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref> indicates that the predicted values are not far off the actual observed values based on our data set. For most of the observations, our model was able to perform decently in predicting the final costs.</p>
<p><xref ref-type="fig" rid="F3">Figure&#x00A0;3</xref> displays the Bland&#x2013;Altman plot. The following information can be derived visually from the diagram: (1) an estimate of the true value on the <italic>x</italic>-axis (mean), (2) standard deviation, (3) whether and to what extent systematic measurement errors (bias) lead to the deviations (variability was eliminated by difference formation on the <italic>y</italic>-axis), (4) whether the deviation of the methods or the dispersion of the deviation depends on the level of the measured values, and (5) whether outliers are present. Based on the plot, one can imply that the values are mostly well distributed and not many outliers occur.</p>
<fig id="F3" position="float"><label>Figure 3</label>
<caption><p>Bland&#x2013;Altman plot, measured by the difference of both measured values (<italic>S</italic>1&#x2013;<italic>S</italic>2) plotted on the <italic>y</italic>-axis against the mean value (<italic>S</italic>1&#x2009;&#x002B;&#x2009;<italic>S</italic>2/2) on the <italic>x</italic>-axis.</p></caption>
<graphic xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fsurg-09-939079-g003.tif"/>
</fig>
</sec>
</sec>
<sec id="s4" sec-type="discussion"><title>Discussion</title>
<p>Cost and finance play an increasingly important role in today&#x0027;s healthcare system. It is imperative that hospitals control their costs more accurately beforehand and estimate the expenditure so that they do not get into financial difficulties.</p>
<p>Especially in surgery, and specifically colon surgery, this predictive model allows us to manage better and optimize the process in front of the surgeon and hospital.</p>
<sec id="s4a"><title>Interpretation of results</title>
<p>As indicated, in this study, three models were developed and tested. The results show that random forest has the lowest percentage for all the costs examined on MAPE.</p>
<p>The lowest MAPE percentage for the random forest model indicates that this model is the most accurate at predicting costs associated with surgeries compared to the other two models examined. Typically, MAPE is a measure of error. It is used to measure the accuracy of a forecast (<xref ref-type="bibr" rid="B12">12</xref>). In calculating MAPE, the difference between the actual value and the forecast value is determined and expressed as a percentage. This means that if the difference between the actual value and the forecast value is small, the percentage is small (<xref ref-type="bibr" rid="B13">13</xref>). On the other hand, if the difference between the actual value and the forecast value is large, the MAPE percentage is large. This implies that a small MAPE percent indicates that the forecast value is near the actual value. In other words, the forecast value is more accurate (<xref ref-type="bibr" rid="B14">14</xref>). In the case of the three models, since the random forest model had the lowest MAPE percent value for all the costs compared to the other models considered, it is the most effective model in predicting the cost.</p>
<p>Why is random forest the most effective predictive model compared to the decision tree and generalized boosted regression models? This question can be answered by examining the model. The random forest model is a machine learning technique that is used to solve classification and regression problems (<xref ref-type="bibr" rid="B15">15</xref>). This model uses ensemble learning, a technique that combines many classifiers to obtain solutions to complex problems. A random forest algorithm comprises multiple decision trees. The forest that is generated by the algorithm is trained through bootstrap aggregating or bagging (<xref ref-type="bibr" rid="B16">16</xref>). Bagging is a meta-algorithm that improves the machine learning algorithms&#x2019; accuracy.</p>
<p>The random forest algorithm establishes the result from the predictions of decision trees. It predicts by taking the mean of the prediction output of the various trees (<xref ref-type="bibr" rid="B17">17</xref>). This implies that the predicted outcome by the algorithm becomes more accurate when the number of decision trees is increased.</p>
<p>One of the features of the random forest model that makes it more accurate in predicting cost outcomes is it reduces the overfitting problem normally experienced when using the decision tree model. As indicated, the model uses an ensemble learning method based on bagging (<xref ref-type="bibr" rid="B15">15</xref>, <xref ref-type="bibr" rid="B18">18</xref>). In other words, the model creates many decision trees and then considers the outcomes of all the trees in its final prediction, enhancing the prediction accuracy by the model.</p>
<p>However, despite the higher accuracy of the random forest model when compared to the decision tree and generalized boosted regression models, the model does not have the highest possible accuracy when considered alone. Normally, when examining the accuracy of a prediction using MAPE, a result of less than 10&#x0025; is considered highly accurate. A MAPE score of less than 20&#x0025; denotes a good forecast, while that between 20&#x0025; and 50&#x0025; is considered a reasonable forecast (<xref ref-type="bibr" rid="B12">12</xref>). The results show that the random forest model gives mostly reasonable forecasts rather than accurate forecasts. The model gave an outcome of over 20&#x0025; when analyzed using the MAPE. This means that while it is the most accurate model compared to the other models, when considered alone, it has only considerable accuracy and does not accurately predict the cost incurred.</p>
<p>A number of similar studies have been carried out on the random forest model in terms of its accuracy in predicting outcomes. For example, Mei et al. examined the prediction accuracy of the random forest model when applying real-time forecasting of the New York electricity market (<xref ref-type="bibr" rid="B18">18</xref>). In reviewing the model&#x0027;s prediction accuracy, its results were compared to that of the auto-regressive-moving-average model and an artificial neural network model. It was established that the random forest model exhibited a lower MAPE value. The results of the study by Mei et al. are similar to those of this study, which also show that the random forest model has a higher level of making fewer mistakes by predicting when compared to other studies (<xref ref-type="bibr" rid="B18">18</xref>). However, a shortcoming of the study by Mei et al. is that it compared the random forest model to only two other models. This does not provide adequate insight into the model&#x0027;s prediction accuracy (<xref ref-type="bibr" rid="B18">18</xref>). A comparison with additional models would have helped determine whether the random forest model was the most accurate prediction model or if others were more accurate.</p>
<p>Another similar approach to comparing algorithms was made by Xu et al., who developed and tested an accurate prediction model based on the random forest classification algorithm (<xref ref-type="bibr" rid="B19">19</xref>). They evaluated the prediction for inland water quality. To evaluate the performance of the model, the researchers compared it to other models, namely, multilayer perceptron, SVR (support vector regression), KNN (K-nearest neighbor), ridge regression, gradient boosting regression, bagging, and decision tree. It was established that the random forest-based prediction model had the highest level of accuracy when compared to all the other prediction models examined. This implies that random forest provides the most accurate outcomes when used for prediction. The results in the study by Wang et al. align with those of this study since it was also established that the random forest model is the most accurate compared to other models. The study by Wang et al. provides better insight into the accuracy of the random forest model because it compared it to multiple models (<xref ref-type="bibr" rid="B19">19</xref>). It indicates that the random forest model is one of the most accurate prediction models that can be used to predict costs for surgery.</p>
<p>At last, the results are in line with those of Toqu&#x00E9; et al., who also established that the random forest model has higher accuracy than other models (<xref ref-type="bibr" rid="B20">20</xref>). In the study, Toqu&#x00E9; et al. built and tested machine learning models for forecasting the Montreal subway smart card entry logs using event data to find an optimal model that accurately predicts the number of incoming passengers at each station of a transportation network (<xref ref-type="bibr" rid="B20">20</xref>). The prediction models were random forest, gradient boosting decision trees, artificial neural networks, and kernel-based models, including a support vector regressor and a Gaussian process (<xref ref-type="bibr" rid="B20">20</xref>). The results showed that all random forest models performed best using root mean squared error for the evaluation and did decent using MAPE and mean absolute error.</p>
<p>The results in this study show that all models have reasonable accuracy as the MAPE for each cost highlighted is below 50&#x0025;. This means that all models can be used to predict the costs to some level of accuracy. However, when compared, it can be seen that the random forest model is a more accurate predictor. These results are evident in similar studies showing that the random forest model is a more accurate prediction model.</p>
</sec>
<sec id="s4b"><title>Implications</title>
<p>One of the implications of the results is that hospitals and other concerned parties can employ the random forest model to forecast costs not only for colon surgery but also the costs of other risks and conditions mentioned previously. This work lays the foundation for further work and research in this area. This will allow for better financial calculations for hospitals. Through such a predictive model, it is possible to better estimate medical costs, which is especially important when factors such as LOS in the hospital and ICU, as well as complications such as anastomotic insufficiency, can have a large financial impact on the high cost. The results show that the random forest model provides more accurate predictions compared to other models like generalized boosted regression and decision tree models. For concerned parties to achieve more accurate results when predicting the costs of conditions or any other outcome, the random forest model should be employed.</p>
<p>Another implication is that there is a need for further research about the model in terms of enhancing the accuracy of the random forest model. The results show that for the final costs examined, the accuracy is more than 20&#x0025;. This is only reasonable accuracy. However, it is way before the desired value. As indicated, the MAPE value of less than 20&#x0025; indicates a good forecast, while that of less than 10&#x0025; shows that the forecast is highly accurate. While achieving a highly accurate forecast is unlikely, any good prediction model should give a good forecast. With the random forest model being the most accurate model, this implies that it should be developed further to improve accuracy so as to give more credible results when used to predict outcomes, meaning further research is needed on the model.</p>
<p>Despite the good implications and the wide range of applications, the ethical aspect should not be ignored. Naik et al. have shown in their work that there are currently no well-defined guidelines when treating people with an application such as this. They mention that transparency must be created when working with such algorithms. Furthermore, weaknesses such as cyber attacks and privacy invasions should not be ignored if you want to advance this field and research (<xref ref-type="bibr" rid="B21">21</xref>).</p>
</sec>
<sec id="s4c"><title>Limitations of the study</title>
<p>The main limitation of the study is a lack of a representative sample. In this case, the focus was on patients undergoing colon surgery. However, in the sample dataset, only 347 individuals met this criterion. This implies that the sample was not selected in a manner that made it representative of patients undergoing colon surgery. The larger the dataset, the more accurate the results are. However, the limited number of individuals with common reasons for higher costs implies that it was impossible to effectively test the developed models in terms of their ability to predict costs associated with the disease. For such models, there is a need for adequate and detailed data to ensure they are tested thoroughly. Additionally, an overall increase in the sample size could result in more precise models by looking at the values in <xref ref-type="table" rid="T4">Table&#x00A0;4</xref>. Especially, the events per predictor should be bigger.</p>
<table-wrap id="T4" position="float"><label>Table 4</label>
<caption><p>Internal validation performance for the three developed models.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Classifier</th>
<th valign="top" align="center">MAPE (&#x0025;) final costs</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Random forest</td>
<td valign="top" align="center">21.4 (17.2&#x2013;26.8)</td>
</tr>
<tr>
<td valign="top" align="left">Decision tree</td>
<td valign="top" align="center">25.2 (21.4&#x2013;26.3)</td>
</tr>
<tr>
<td valign="top" align="left">Generalized boosted regression</td>
<td valign="top" align="center">29.7 (25.2&#x2013;34.2)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="table-fn3"><p>MAPE, mean absolute percentage error.</p></fn>
<fn id="table-fn4"><p>Scores are reported as means (95&#x0025; confidence intervals).</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec id="s5" sec-type="conclusions"><title>Conclusion</title>
<p>Postoperative complications such as anastomotic insufficiency and ICU or hospital LOS increase the cost burden for patients and hospitals. Also, preoperative conditions like CCI increase the cost. However, there is no way of predicting these costs so that a patient or healthcare system can prepare adequately to handle the condition. This study thereby aimed to develop and validate a prediction model to accurately predict costs and develop strategies to eliminate or cover them. Out of the three tested models, the results obtained based on MAPE analysis showed that the random forest model is the most accurate. Therefore, the results imply this model should be adopted for prediction. However, the fact that MAPE results were mostly above 20&#x0025; means that further research should be undertaken to improve its accuracy.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability"><title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors upon reasonable request.</p>
</sec>
<sec id="s7"><title>Author contributions</title>
<p>Conceptualization, AT and BE; data collection, BE; analysis, VO and AT; visualization, AT; writing&#x2014;original draft preparation, AT, STM and VO; writing&#x2014;review and editing, DMF, MDH, and PCC. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="s8" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xi</surname><given-names>Y</given-names></name><name><surname>Xu</surname><given-names>P</given-names></name></person-group>. <article-title>Global colorectal cancer burden in 2020 and projections to 2040</article-title>. <source>Transl Oncol</source>. (<year>2021</year>) <volume>14</volume>(<issue>10</issue>):<fpage>101174</fpage>. <pub-id pub-id-type="doi">10.1016/j.tranon.2021.101174</pub-id><pub-id pub-id-type="pmid">34243011</pub-id></citation></ref>
<ref id="B2"><label>2.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Soeters</surname><given-names>PB</given-names></name><name><surname>de Zoete</surname><given-names>JPJGM</given-names></name><name><surname>Dejong</surname><given-names>CHC</given-names></name><name><surname>Williams</surname><given-names>NS</given-names></name><name><surname>Baeten</surname><given-names>CGMI</given-names></name></person-group>. <article-title>Colorectal surgery and anastomotic leakage</article-title>. <source>Dig Surg</source>. (<year>2002</year>) <volume>19</volume>(<issue>2</issue>):<fpage>150</fpage>. <pub-id pub-id-type="doi">10.1159/000052031</pub-id><pub-id pub-id-type="pmid">11979005</pub-id></citation></ref>
<ref id="B3"><label>3.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Karliczek</surname><given-names>A</given-names></name><name><surname>Harlaar</surname><given-names>NJ</given-names></name><name><surname>Zeebregts</surname><given-names>CJ</given-names></name><name><surname>Wiggers</surname><given-names>T</given-names></name><name><surname>Baas</surname><given-names>PC</given-names></name><name><surname>Van Dam</surname><given-names>GM</given-names></name></person-group>. <article-title>Surgeons lack predictive accuracy for anastomotic leakage in gastrointestinal surgery</article-title>. <source>Int J Colorectal Dis</source>. (<year>2009</year>) <volume>24</volume>(<issue>5</issue>):<fpage>569</fpage>&#x2013;<lpage>76</lpage>. <pub-id pub-id-type="doi">10.1007/s00384-009-0658-6</pub-id><pub-id pub-id-type="pmid">19221768</pub-id></citation></ref>
<ref id="B4"><label>4.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kourou</surname><given-names>K</given-names></name><name><surname>Exarchos</surname><given-names>TP</given-names></name><name><surname>Exarchos</surname><given-names>KP</given-names></name><name><surname>Karamouzis</surname><given-names>MV</given-names></name><name><surname>Fotiadis</surname><given-names>DI</given-names></name></person-group>. <article-title>Machine learning applications in cancer prognosis and prediction</article-title>. <source>Comput Struct Biotechnol J</source>. (<year>2015</year>) <volume>13</volume>:<fpage>8</fpage>&#x2013;<lpage>17</lpage>. <pub-id pub-id-type="doi">10.1016/j.csbj.2014.11.005</pub-id><pub-id pub-id-type="pmid">25750696</pub-id></citation></ref>
<ref id="B5"><label>5.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mosavi</surname><given-names>A</given-names></name><name><surname>Ozturk</surname><given-names>P</given-names></name><name><surname>Chau</surname><given-names>KW</given-names></name></person-group>. <article-title>Flood prediction using machine learning models: literature review</article-title>. <source>Water (Basel)</source>. (<year>2018</year>) <volume>10</volume>(<issue>11</issue>):<fpage>1536</fpage>. <pub-id pub-id-type="doi">10.3390/w10111536</pub-id></citation></ref>
<ref id="B6"><label>6.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Musunuri</surname><given-names>B</given-names></name><name><surname>Shetty</surname><given-names>S</given-names></name><name><surname>Shetty</surname><given-names>D</given-names></name><name><surname>Vanahalli</surname><given-names>M</given-names></name><name><surname>Pradhan</surname><given-names>A</given-names></name><name><surname>Naik</surname><given-names>N</given-names></name><etal/></person-group> <article-title>Acute-on-chronic liver failure mortality prediction using an artificial neural network</article-title>. <source>Eng Sci</source>. (<year>2021</year>) <volume>15</volume>:<fpage>187</fpage>&#x2013;<lpage>96</lpage>. <pub-id pub-id-type="doi">10.30919/es8d515</pub-id></citation></ref>
<ref id="B7"><label>7.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hameed</surname><given-names>BMZ</given-names></name><name><surname>Dhavileswarapu</surname><given-names>AVLS</given-names></name><name><surname>Raza</surname><given-names>SZ</given-names></name><name><surname>Karimi</surname><given-names>H</given-names></name><name><surname>Khanuja</surname><given-names>HS</given-names></name><name><surname>Shetty</surname><given-names>DK</given-names></name><etal/></person-group> <article-title>Artificial intelligence and its impact on urological diseases and management: a comprehensive review of the literature</article-title>. <source>J Clin Med</source>. (<year>2021</year>) <volume>10</volume>:<fpage>1864</fpage>. <pub-id pub-id-type="doi">10.3390/jcm10091864</pub-id><pub-id pub-id-type="pmid">33925767</pub-id></citation></ref>
<ref id="B8"><label>8.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Collins</surname><given-names>GS</given-names></name><name><surname>Reitsma</surname><given-names>JB</given-names></name><name><surname>Altman</surname><given-names>DG</given-names></name><name><surname>Moons</surname><given-names>KG</given-names></name></person-group>. <article-title>Transparent reporting of a multivariable prediction model for individual prognosis or diagnosis (TRIPOD): the TRIPOD statement</article-title>. <source>Br J Surg</source>. (<year>2015</year>) <volume>102</volume>(<issue>3</issue>):<fpage>148</fpage>&#x2013;<lpage>58</lpage>. <pub-id pub-id-type="doi">10.1002/bjs.9736</pub-id><pub-id pub-id-type="pmid">25627261</pub-id></citation></ref>
<ref id="B9"><label>9.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bolenz</surname><given-names>G</given-names></name><name><surname>Roehrborn</surname><given-names>L</given-names></name></person-group>. <article-title>Predictors of costs for robotic assisted laparoscopic radical prostatectomy, urologic oncology: seminars and original investigations</article-title>. <source>Urol Oncol</source>. (<year>2011</year>) <volume>29</volume>(<issue>3</issue>):<fpage>325</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1016/j.urolonc.2011.01.016</pub-id><pub-id pub-id-type="pmid">21555102</pub-id></citation></ref>
<ref id="B10"><label>10.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Abidin</surname><given-names>S</given-names></name><name><surname>Jaffar</surname><given-names>MM</given-names></name></person-group>. <article-title>Forecasting share prices of small size companies in Bursa Malaysia</article-title>. <source>Appl Math Inf Sci</source>. (<year>2014</year>) <volume>8</volume>:<fpage>107</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.12785/amis/080112</pub-id></citation></ref>
<ref id="B11"><label>11.</label><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Sushmita</surname><given-names>N</given-names></name><name><surname>Marquardt</surname><given-names>R</given-names></name><name><surname>Prasad</surname><given-names>DC</given-names></name><name><surname>Teredesai</surname><given-names>A</given-names></name></person-group>. <conf-name>Population cost prediction on public healthcare datasets</conf-name>. <conf-name>Proceedings of the 5th International Conference on Digital Health 2015</conf-name>. <publisher-loc>New York</publisher-loc>, <publisher-loc>NY</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name> (<year>2015</year>). p. <fpage>87</fpage>&#x2013;<lpage>94</lpage>.</citation></ref>
<ref id="B12"><label>12.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Coleman</surname><given-names>CD</given-names></name><name><surname>Swanson</surname><given-names>DA</given-names></name></person-group>. <article-title>On MAPE-R as a measure of cross-sectional estimation and forecast accuracy</article-title>. <source>J Econ Soc Meas</source>. (<year>2007</year>) <volume>32</volume>(<issue>4</issue>):<fpage>219</fpage>&#x2013;<lpage>33</lpage>. <pub-id pub-id-type="doi">10.3233/JEM-2007-0290</pub-id></citation></ref>
<ref id="B13"><label>13.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hyndman</surname><given-names>RJ</given-names></name><name><surname>Koehler</surname><given-names>AB</given-names></name></person-group>. <article-title>Another look at measures of forecast accuracy</article-title>. <source>Int J Forecast</source>. (<year>2006</year>) <volume>22</volume>(<issue>4</issue>):<fpage>679</fpage>&#x2013;<lpage>88</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijforecast.2006.03.001</pub-id></citation></ref>
<ref id="B14"><label>14.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rayer</surname><given-names>S</given-names></name></person-group>. <article-title>Population forecast accuracy: does the choice of summary measure of error matter?</article-title> <source>Popul Res Policy Rev</source>. (<year>2007</year>) <volume>26</volume>(<issue>2</issue>):<fpage>163</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1007/s11113-007-9030-0</pub-id></citation></ref>
<ref id="B15"><label>15.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sinha</surname><given-names>P</given-names></name><name><surname>Gaughan</surname><given-names>AE</given-names></name><name><surname>Stevens</surname><given-names>FR</given-names></name><name><surname>Nieves</surname><given-names>JJ</given-names></name><name><surname>Sorichetta</surname><given-names>A</given-names></name><name><surname>Tatem</surname><given-names>AJ</given-names></name></person-group>. <article-title>Assessing the spatial sensitivity of a random forest model: application in gridded population modeling</article-title>. <source>Comput Environ Urban Syst</source>. (<year>2019</year>) <volume>75</volume>:<fpage>132</fpage>&#x2013;<lpage>45</lpage>. <pub-id pub-id-type="doi">10.1016/j.compenvurbsys.2019.01.006</pub-id></citation></ref>
<ref id="B16"><label>16.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bharathidason</surname><given-names>S</given-names></name><name><surname>Venkataeswaran</surname><given-names>CJ</given-names></name></person-group>. <article-title>Improving classification accuracy based on random forest model with uncorrelated high performing trees</article-title>. <source>Int J Comput Appl</source>. (<year>2014</year>) <volume>101</volume>(<issue>13</issue>):<fpage>26</fpage>&#x2013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.5120/17749-8829</pub-id></citation></ref>
<ref id="B17"><label>17.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>L</given-names></name><name><surname>Liu</surname><given-names>ZP</given-names></name><name><surname>Zhang</surname><given-names>XS</given-names></name><name><surname>Chen</surname><given-names>L</given-names></name></person-group>. <article-title>Prediction of hot spots in protein interfaces using a random forest model with hybrid features</article-title>. <source>Protein Eng, Des Sel</source>. (<year>2012</year>) <volume>25</volume>(<issue>3</issue>):<fpage>119</fpage>&#x2013;<lpage>26</lpage>. <pub-id pub-id-type="doi">10.1093/protein/gzr066</pub-id></citation></ref>
<ref id="B18"><label>18.</label><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Mei</surname><given-names>J</given-names></name><name><surname>He</surname><given-names>D</given-names></name><name><surname>Harley</surname><given-names>R</given-names></name><name><surname>Habetler</surname><given-names>T</given-names></name><name><surname>Qu</surname><given-names>G</given-names></name></person-group>. <conf-name>A random forest method for real-time price forecasting in New York electricity market</conf-name>. <conf-name>2014 IEEE PES General Meeting &#x007C; Conference &#x0026; Exposition</conf-name>. <comment>National Harbor, MD, USA: IEEE</comment> (<year>2014</year>). p. <fpage>1</fpage>&#x2013;<lpage>5</lpage>.</citation></ref>
<ref id="B19"><label>19.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname><given-names>J</given-names></name><name><surname>Xu</surname><given-names>Z</given-names></name><name><surname>Kuang</surname><given-names>J</given-names></name><name><surname>Lin</surname><given-names>C</given-names></name><name><surname>Xiao</surname><given-names>L</given-names></name><name><surname>Huang</surname><given-names>X</given-names></name><etal/></person-group> <article-title>An alternative to laboratory testing: random forest-based water quality prediction framework for inland and nearshore water bodies</article-title>. <source>Water (Basel)</source>. (<year>2021</year>) <volume>13</volume>:<fpage>3262</fpage>. <pub-id pub-id-type="doi">10.3390/w13223262</pub-id></citation></ref>
<ref id="B20"><label>20.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Toqu&#x00E9;</surname><given-names>F</given-names></name><name><surname>C&#x00F4;me</surname><given-names>E</given-names></name><name><surname>Tr&#x00E9;panier</surname><given-names>M</given-names></name><name><surname>Oukellou</surname><given-names>F</given-names></name></person-group>. <comment>Forecasting of the Montreal subway smart card entry logs with event data. TRB, CIRRELT-2020-33</comment> (<year>2020</year>).</citation></ref>
<ref id="B21"><label>21.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Naik</surname><given-names>N</given-names></name><name><surname>Hameed</surname><given-names>BMZ</given-names></name><name><surname>Shetty</surname><given-names>DK</given-names></name><name><surname>Swain</surname><given-names>D</given-names></name><name><surname>Shah</surname><given-names>M</given-names></name><name><surname>Paul</surname><given-names>R</given-names></name><etal/></person-group> <article-title>Legal and ethical consideration in artificial intelligence in healthcare: who takes responsibility?</article-title> <source>Front Surg</source>. (<year>2022</year>) <volume>9</volume>:<fpage>862322</fpage>. <pub-id pub-id-type="doi">10.3389/fsurg.2022.862322</pub-id><pub-id pub-id-type="pmid">35360424</pub-id></citation></ref></ref-list>
</back>
</article>