diff --git a/models/__pycache__/catboost_model.cpython-312.pyc b/models/__pycache__/catboost_model.cpython-312.pyc index f0c02c8..5823aaa 100644 Binary files a/models/__pycache__/catboost_model.cpython-312.pyc and b/models/__pycache__/catboost_model.cpython-312.pyc differ diff --git a/models/__pycache__/lightgbm_model.cpython-312.pyc b/models/__pycache__/lightgbm_model.cpython-312.pyc index 3e9e062..bddacf0 100644 Binary files a/models/__pycache__/lightgbm_model.cpython-312.pyc and b/models/__pycache__/lightgbm_model.cpython-312.pyc differ diff --git a/models/catboost_model.py b/models/catboost_model.py index c312428..a14ce71 100644 --- a/models/catboost_model.py +++ b/models/catboost_model.py @@ -11,7 +11,15 @@ from tqdm import tqdm class CAT_BOOST: - def __init__(self, data_frame, params={}, n_split_kfold=5, test_size=0.15, seed=42): + def __init__( + self, + data_frame, + params={}, + n_split_kfold=5, + test_size=0.15, + seed=42, + output_file_tuning="cat_boost_tuning_results.csv", + ): self.data_frame = data_frame self.params = params self.n_split_kfold = n_split_kfold @@ -35,6 +43,8 @@ class CAT_BOOST: self.kmeans_estimator = self.params.get("kmeans_estimator", 5) self.tuning_results = None + self.output_file_tuning = output_file_tuning + def preprocess(self): self.scaling_method = self.params.get("scaling_method", None) @@ -208,6 +218,6 @@ class CAT_BOOST: df_tuning = pandas.concat( [df_tuning.drop(columns=["metrics"]), metrics_df], axis=1 ) - df_tuning.to_csv("cat_boost_tuning_results.csv", index=False) + df_tuning.to_csv(self.output_file_tuning, index=False) return diff --git a/models/compare_models.py b/models/compare_models.py index dc5e157..5d1142a 100644 --- a/models/compare_models.py +++ b/models/compare_models.py @@ -2,9 +2,9 @@ import pandas from catboost_model import CAT_BOOST from lightgbm_model import LIGHT_GBM -data_frame = pandas.read_csv("./data/Ketamine_icp_no_missing.csv") +data_frame = pandas.read_csv("../data/Ketamine_icp_no_missing.csv") -cat_boost_results = pandas.read_csv("./cat_boost_tuning_results.csv") +cat_boost_results = pandas.read_csv("./cat_boost_tuning_results_no_x.csv") lgbm_results = pandas.read_csv("./lightgbm_tuning_results.csv") @@ -44,12 +44,12 @@ lgbm_test_metrics_clean = clean_metrics(lgbm_test_metrics) comparison_df = pd.DataFrame( [ - {"model": "catboost", **cat_test_metrics_clean}, + {"model": "lightgbm_no_x", **cat_test_metrics_clean}, {"model": "lightgbm", **lgbm_test_metrics_clean}, ] ) -comparison_filename = "comparison_catboost_lightgbm.csv" +comparison_filename = "comparison_lightgbm_no_x_vs_lightgbm.csv" comparison_df.to_csv(comparison_filename, index=False) print(f"Comparison saved to: {comparison_filename}") diff --git a/models/lightgbm_model.py b/models/lightgbm_model.py index 5b3dd59..0d27a8a 100644 --- a/models/lightgbm_model.py +++ b/models/lightgbm_model.py @@ -11,7 +11,15 @@ from tqdm import tqdm class LIGHT_GBM: - def __init__(self, data_frame, params={}, n_split_kfold=5, test_size=0.15, seed=42): + def __init__( + self, + data_frame, + params={}, + n_split_kfold=5, + test_size=0.15, + seed=42, + output_file_tuning="lightgbm_tuning_results.csv", + ): self.data_frame = data_frame self.params = params self.n_split_kfold = n_split_kfold @@ -37,6 +45,8 @@ class LIGHT_GBM: self.kmeans_estimator = self.params.get("kmeans_estimator", 5) self.tuning_results = None + self.output_file_tuning = output_file_tuning + def preprocess(self): self.scaling_method = self.params.get("scaling_method", None) if self.scaling_method: @@ -232,6 +242,6 @@ class LIGHT_GBM: df_tuning = pandas.concat( [df_tuning.drop(columns=["metrics"]), metrics_df], axis=1 ) - df_tuning.to_csv("lightgbm_tuning_results.csv", index=False) + df_tuning.to_csv(self.output_file_tuning, index=False) return