training data without x features

This commit is contained in:
2025-12-06 21:52:40 +01:00
parent 80ea363123
commit 672b4524a9
5 changed files with 28 additions and 8 deletions

View File

@@ -11,7 +11,15 @@ from tqdm import tqdm
class CAT_BOOST:
def __init__(self, data_frame, params={}, n_split_kfold=5, test_size=0.15, seed=42):
def __init__(
self,
data_frame,
params={},
n_split_kfold=5,
test_size=0.15,
seed=42,
output_file_tuning="cat_boost_tuning_results.csv",
):
self.data_frame = data_frame
self.params = params
self.n_split_kfold = n_split_kfold
@@ -35,6 +43,8 @@ class CAT_BOOST:
self.kmeans_estimator = self.params.get("kmeans_estimator", 5)
self.tuning_results = None
self.output_file_tuning = output_file_tuning
def preprocess(self):
self.scaling_method = self.params.get("scaling_method", None)
@@ -208,6 +218,6 @@ class CAT_BOOST:
df_tuning = pandas.concat(
[df_tuning.drop(columns=["metrics"]), metrics_df], axis=1
)
df_tuning.to_csv("cat_boost_tuning_results.csv", index=False)
df_tuning.to_csv(self.output_file_tuning, index=False)
return

View File

@@ -2,9 +2,9 @@ import pandas
from catboost_model import CAT_BOOST
from lightgbm_model import LIGHT_GBM
data_frame = pandas.read_csv("./data/Ketamine_icp_no_missing.csv")
data_frame = pandas.read_csv("../data/Ketamine_icp_no_missing.csv")
cat_boost_results = pandas.read_csv("./cat_boost_tuning_results.csv")
cat_boost_results = pandas.read_csv("./cat_boost_tuning_results_no_x.csv")
lgbm_results = pandas.read_csv("./lightgbm_tuning_results.csv")
@@ -44,12 +44,12 @@ lgbm_test_metrics_clean = clean_metrics(lgbm_test_metrics)
comparison_df = pd.DataFrame(
[
{"model": "catboost", **cat_test_metrics_clean},
{"model": "lightgbm_no_x", **cat_test_metrics_clean},
{"model": "lightgbm", **lgbm_test_metrics_clean},
]
)
comparison_filename = "comparison_catboost_lightgbm.csv"
comparison_filename = "comparison_lightgbm_no_x_vs_lightgbm.csv"
comparison_df.to_csv(comparison_filename, index=False)
print(f"Comparison saved to: {comparison_filename}")

View File

@@ -11,7 +11,15 @@ from tqdm import tqdm
class LIGHT_GBM:
def __init__(self, data_frame, params={}, n_split_kfold=5, test_size=0.15, seed=42):
def __init__(
self,
data_frame,
params={},
n_split_kfold=5,
test_size=0.15,
seed=42,
output_file_tuning="lightgbm_tuning_results.csv",
):
self.data_frame = data_frame
self.params = params
self.n_split_kfold = n_split_kfold
@@ -37,6 +45,8 @@ class LIGHT_GBM:
self.kmeans_estimator = self.params.get("kmeans_estimator", 5)
self.tuning_results = None
self.output_file_tuning = output_file_tuning
def preprocess(self):
self.scaling_method = self.params.get("scaling_method", None)
if self.scaling_method:
@@ -232,6 +242,6 @@ class LIGHT_GBM:
df_tuning = pandas.concat(
[df_tuning.drop(columns=["metrics"]), metrics_df], axis=1
)
df_tuning.to_csv("lightgbm_tuning_results.csv", index=False)
df_tuning.to_csv(self.output_file_tuning, index=False)
return