LightGBM tuning

This commit is contained in:
2025-12-06 00:14:59 +01:00
parent ad01d6a3db
commit 42091d6f24
14 changed files with 1539 additions and 1784 deletions

View File

@@ -40,18 +40,62 @@ def missing_value_handler(data_path):
missing_value_counts = data_frame_imputed.isna().sum()
write_textfile(f"{data_directory}/no_missing.txt", missing_value_counts)
data_frame_imputed.to_csv("./data/Ketamine_icp_no_missing.csv", index=False)
return data_frame_imputed
def scaling_handler(data_frame, method="robust_scaling"):
if method == "robust_scaling":
import pandas
from sklearn.preprocessing import RobustScaler
import pandas
from sklearn.preprocessing import (
MaxAbsScaler,
MinMaxScaler,
PowerTransformer,
QuantileTransformer,
RobustScaler,
StandardScaler,
)
labels = data_frame["label"]
# Separate features and label
labels = data_frame["label"]
X = data_frame.drop("label", axis=1)
# Choose scaler/transformer
if method == "robust_scaling":
scaler = RobustScaler()
x = data_frame.drop("label", axis=1)
x_scale = scaler.fit_transform(x)
data_frame_scaled = pandas.DataFrame(x_scale, columns=x.columns)
data_frame_scaled["label"] = labels.values
return data_frame_scaled
elif method == "standard_scaling":
scaler = StandardScaler()
elif method == "minmax_scaling":
scaler = MinMaxScaler()
elif method == "maxabs_scaling":
scaler = MaxAbsScaler()
elif method == "quantile_normal":
scaler = QuantileTransformer(output_distribution="normal", random_state=42)
elif method == "quantile_uniform":
scaler = QuantileTransformer(output_distribution="uniform", random_state=42)
elif method == "yeo_johnson":
scaler = PowerTransformer(method="yeo-johnson")
elif method == "box_cox":
# Box-Cox requires all positive values
scaler = PowerTransformer(
method="box-cox",
)
X_pos = X.copy()
min_per_column = X_pos.min()
for col in X_pos.columns:
if min_per_column[col] <= 0:
X_pos[col] = X_pos[col] + abs(min_per_column[col]) + 1e-6 # tiny offset
X = X_pos
else:
raise ValueError(f"Unknown scaling method: {method}")
# Fit and transform
X_scaled = scaler.fit_transform(X)
data_frame_scaled = pandas.DataFrame(X_scaled, columns=X.columns)
data_frame_scaled["label"] = labels.values
return data_frame_scaled