training data without x features

2025-12-06 21:52:40 +01:00
parent 80ea363123
commit 672b4524a9
5 changed files with 28 additions and 8 deletions
--- a/models/pycache/catboost_model.cpython-312.pyc
+++ b/models/pycache/catboost_model.cpython-312.pyc
--- a/models/pycache/lightgbm_model.cpython-312.pyc
+++ b/models/pycache/lightgbm_model.cpython-312.pyc
--- a/models/catboost_model.py
+++ b/models/catboost_model.py
@@ -11,7 +11,15 @@ from tqdm import tqdm


 class CAT_BOOST:
-    def __init__(self, data_frame, params={}, n_split_kfold=5, test_size=0.15, seed=42):
+    def __init__(
+        self,
+        data_frame,
+        params={},
+        n_split_kfold=5,
+        test_size=0.15,
+        seed=42,
+        output_file_tuning="cat_boost_tuning_results.csv",
+    ):
        self.data_frame = data_frame
        self.params = params
        self.n_split_kfold = n_split_kfold
@@ -35,6 +43,8 @@ class CAT_BOOST:
        self.kmeans_estimator = self.params.get("kmeans_estimator", 5)
        self.tuning_results = None

+        self.output_file_tuning = output_file_tuning
+
    def preprocess(self):
        self.scaling_method = self.params.get("scaling_method", None)

@@ -208,6 +218,6 @@ class CAT_BOOST:
        df_tuning = pandas.concat(
            [df_tuning.drop(columns=["metrics"]), metrics_df], axis=1
        )
-        df_tuning.to_csv("cat_boost_tuning_results.csv", index=False)
+        df_tuning.to_csv(self.output_file_tuning, index=False)

        return
--- a/models/compare_models.py
+++ b/models/compare_models.py
@@ -2,9 +2,9 @@ import pandas
 from catboost_model import CAT_BOOST
 from lightgbm_model import LIGHT_GBM

-data_frame = pandas.read_csv("./data/Ketamine_icp_no_missing.csv")
+data_frame = pandas.read_csv("../data/Ketamine_icp_no_missing.csv")

-cat_boost_results = pandas.read_csv("./cat_boost_tuning_results.csv")
+cat_boost_results = pandas.read_csv("./cat_boost_tuning_results_no_x.csv")
 lgbm_results = pandas.read_csv("./lightgbm_tuning_results.csv")


@@ -44,12 +44,12 @@ lgbm_test_metrics_clean = clean_metrics(lgbm_test_metrics)

 comparison_df = pd.DataFrame(
    [
-        {"model": "catboost", **cat_test_metrics_clean},
+        {"model": "lightgbm_no_x", **cat_test_metrics_clean},
        {"model": "lightgbm", **lgbm_test_metrics_clean},
    ]
 )

-comparison_filename = "comparison_catboost_lightgbm.csv"
+comparison_filename = "comparison_lightgbm_no_x_vs_lightgbm.csv"
 comparison_df.to_csv(comparison_filename, index=False)

 print(f"Comparison saved to: {comparison_filename}")
--- a/models/lightgbm_model.py
+++ b/models/lightgbm_model.py
@@ -11,7 +11,15 @@ from tqdm import tqdm


 class LIGHT_GBM:
-    def __init__(self, data_frame, params={}, n_split_kfold=5, test_size=0.15, seed=42):
+    def __init__(
+        self,
+        data_frame,
+        params={},
+        n_split_kfold=5,
+        test_size=0.15,
+        seed=42,
+        output_file_tuning="lightgbm_tuning_results.csv",
+    ):
        self.data_frame = data_frame
        self.params = params
        self.n_split_kfold = n_split_kfold
@@ -37,6 +45,8 @@ class LIGHT_GBM:
        self.kmeans_estimator = self.params.get("kmeans_estimator", 5)
        self.tuning_results = None

+        self.output_file_tuning = output_file_tuning
+
    def preprocess(self):
        self.scaling_method = self.params.get("scaling_method", None)
        if self.scaling_method:
@@ -232,6 +242,6 @@ class LIGHT_GBM:
        df_tuning = pandas.concat(
            [df_tuning.drop(columns=["metrics"]), metrics_df], axis=1
        )
-        df_tuning.to_csv("lightgbm_tuning_results.csv", index=False)
+        df_tuning.to_csv(self.output_file_tuning, index=False)

        return