Files
Electrocardiogram/runner.py
2025-12-06 00:14:59 +01:00

319 lines
8.0 KiB
Python

import pandas
from catboost import CatBoostClassifier
from imblearn.ensemble import BalancedRandomForestClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from custom_models.LGBMFocalWrapper import LGBMFocalWrapper
from train import test_model, train_model_with_kfold
data_frame = pandas.read_csv("./data/Ketamin_icp_cleaned.csv")
y = data_frame["label"]
X = data_frame.drop(columns=["label"])
x_train, x_test, y_train, y_test = train_test_split(
X,
y,
test_size=0.15,
stratify=y,
random_state=42,
)
neg = sum(y_train == 0)
pos = sum(y_train == 1)
scale_pos = neg / pos if pos > 0 else 1.0
models = [
{
"name": "LGBM_FOCAL_LOSS",
"model": LGBMFocalWrapper(
n_estimators=500,
learning_rate=0.05,
max_depth=-1,
subsample=0.8,
colsample_bytree=0.8,
random_state=42,
),
"smote": True,
"smote_method": "kmeans",
},
{
"name": "LGBM_SMOTE",
"model": LGBMClassifier(
n_estimators=500,
learning_rate=0.05,
max_depth=-1,
subsample=0.8,
colsample_bytree=0.8,
random_state=42,
verbose=-1,
n_jobs=-1,
),
"smote": True,
"smote_method": "smote",
},
{
"name": "LGBM_KMEANS_SMOTE",
"model": LGBMClassifier(
n_estimators=500,
learning_rate=0.05,
max_depth=-1,
subsample=0.8,
colsample_bytree=0.8,
random_state=42,
verbose=-1,
n_jobs=-1,
),
"smote": True,
"smote_method": "kmeans",
},
{
"name": "LGBM_SVM_SMOTE",
"model": LGBMClassifier(
n_estimators=500,
learning_rate=0.05,
max_depth=-1,
subsample=0.8,
colsample_bytree=0.8,
random_state=42,
verbose=-1,
n_jobs=-1,
),
"smote": True,
"smote_method": "svm",
},
{
"name": "LGBM_BORDERLINE_SMOTE",
"model": LGBMClassifier(
n_estimators=500,
learning_rate=0.05,
max_depth=-1,
subsample=0.8,
colsample_bytree=0.8,
random_state=42,
verbose=-1,
n_jobs=-1,
),
"smote": True,
"smote_method": "borderline",
},
{
"name": "LGBM_ADASYN_SMOTE",
"model": LGBMClassifier(
n_estimators=500,
learning_rate=0.05,
max_depth=-1,
subsample=0.8,
colsample_bytree=0.8,
random_state=42,
verbose=-1,
n_jobs=-1,
),
"smote": True,
"smote_method": "adasyn",
},
{
"name": "LGBM_Balanced",
"model": LGBMClassifier(
n_estimators=500,
learning_rate=0.05,
max_depth=-1,
subsample=0.8,
colsample_bytree=0.8,
class_weight="balanced",
random_state=42,
verbose=-1,
n_jobs=-1,
),
"smote": False,
},
{
"name": "LGBM_DART",
"model": LGBMClassifier(
n_estimators=500,
learning_rate=0.05,
max_depth=-1,
subsample=0.8,
colsample_bytree=0.8,
boosting_type="dart",
random_state=42,
verbose=-1,
n_jobs=-1,
),
"smote": True,
"smote_method": "kmeans",
},
{
"name": "LGBM_GOSS",
"model": LGBMClassifier(
n_estimators=500,
learning_rate=0.05,
max_depth=-1,
boosting_type="goss",
random_state=42,
verbose=-1,
n_jobs=-1,
),
"smote": True,
"smote_method": "kmeans",
},
{
"name": "LGBM_RF",
"model": LGBMClassifier(
n_estimators=500,
learning_rate=0.05,
max_depth=-1,
boosting_type="rf",
subsample=0.8,
colsample_bytree=0.8,
random_state=42,
verbose=-1,
n_jobs=-1,
),
"smote": True,
"smote_method": "kmeans",
},
{
"name": "LGBM_scale_pos_weight",
"model": LGBMClassifier(
n_estimators=500,
learning_rate=0.05,
max_depth=-1,
scale_pos_weight=scale_pos,
random_state=42,
verbose=-1,
n_jobs=-1,
),
"smote": False,
},
{
"name": "LGBM_is_unbalance",
"model": LGBMClassifier(
n_estimators=500,
learning_rate=0.05,
max_depth=-1,
is_unbalance=True,
random_state=42,
verbose=-1,
n_jobs=-1,
),
"smote": False,
},
{
"name": "LGBM_DART",
"model": LGBMClassifier(
n_estimators=500,
learning_rate=0.05,
max_depth=-1,
subsample=0.8,
colsample_bytree=0.8,
boosting_type="dart",
random_state=42,
verbose=-1,
n_jobs=-1,
),
"smote": True,
"smote_method": "kmeans",
},
{
"name": "XGB_scale_pos_weight",
"model": XGBClassifier(
n_estimators=500,
learning_rate=0.05,
max_depth=6,
scale_pos_weight=scale_pos,
random_state=42,
n_jobs=-1,
use_label_encoder=False,
eval_metric="logloss",
),
"smote": False,
},
{
"name": "CatBoost_balanced",
"model": CatBoostClassifier(
iterations=500,
learning_rate=0.05,
depth=6,
class_weights=[1, scale_pos],
random_state=42,
verbose=0,
),
"smote": False,
},
{
"name": "RandomForest_balanced",
"model": RandomForestClassifier(
n_estimators=500,
max_depth=None,
class_weight="balanced",
random_state=42,
n_jobs=-1,
),
"smote": False,
},
{
"name": "BalancedRandomForest",
"model": BalancedRandomForestClassifier(
n_estimators=500,
max_depth=None,
random_state=42,
n_jobs=-1,
),
"smote": False,
},
{
"name": "LogisticRegression_balanced",
"model": LogisticRegression(
max_iter=1000,
class_weight="balanced",
solver="liblinear",
random_state=42,
),
"smote": False,
},
]
def compute_confusion(y_true, y_pred):
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
return {"TP": tp, "TN": tn, "FP": fp, "FN": fn}
results_to_save = []
for m in models:
print(f"\n===== Training model: {m['name']} =====")
train_results = train_model_with_kfold(
m["model"], x_train, y_train, n_splits=10, smote=m["smote"]
)
y_train_pred = m["model"].predict(x_train)
train_confusion = compute_confusion(y_train, y_train_pred)
test_results = test_model(m["model"], x_test, y_test)
y_test_pred = m["model"].predict(x_test)
test_confusion = compute_confusion(y_test, y_test_pred)
results_to_save.append(
{"model": m["name"], "stage": "train", **train_results, **train_confusion}
)
results_to_save.append(
{"model": m["name"], "stage": "test", **test_results, **test_confusion}
)
results_df = pandas.DataFrame(results_to_save)
csv_file = "lightgbm_results.csv"
try:
results_df.to_csv(csv_file, mode="a", index=False, header=False)
except FileNotFoundError:
results_df.to_csv(csv_file, mode="w", index=False)
print(f"\nAll results saved to {csv_file}")