LightGBM tuning
This commit is contained in:
62
utils.py
62
utils.py
@@ -40,18 +40,62 @@ def missing_value_handler(data_path):
|
||||
|
||||
missing_value_counts = data_frame_imputed.isna().sum()
|
||||
write_textfile(f"{data_directory}/no_missing.txt", missing_value_counts)
|
||||
|
||||
data_frame_imputed.to_csv("./data/Ketamine_icp_no_missing.csv", index=False)
|
||||
|
||||
return data_frame_imputed
|
||||
|
||||
|
||||
def scaling_handler(data_frame, method="robust_scaling"):
|
||||
if method == "robust_scaling":
|
||||
import pandas
|
||||
from sklearn.preprocessing import RobustScaler
|
||||
import pandas
|
||||
from sklearn.preprocessing import (
|
||||
MaxAbsScaler,
|
||||
MinMaxScaler,
|
||||
PowerTransformer,
|
||||
QuantileTransformer,
|
||||
RobustScaler,
|
||||
StandardScaler,
|
||||
)
|
||||
|
||||
labels = data_frame["label"]
|
||||
# Separate features and label
|
||||
labels = data_frame["label"]
|
||||
X = data_frame.drop("label", axis=1)
|
||||
|
||||
# Choose scaler/transformer
|
||||
if method == "robust_scaling":
|
||||
scaler = RobustScaler()
|
||||
x = data_frame.drop("label", axis=1)
|
||||
x_scale = scaler.fit_transform(x)
|
||||
data_frame_scaled = pandas.DataFrame(x_scale, columns=x.columns)
|
||||
data_frame_scaled["label"] = labels.values
|
||||
return data_frame_scaled
|
||||
elif method == "standard_scaling":
|
||||
scaler = StandardScaler()
|
||||
elif method == "minmax_scaling":
|
||||
scaler = MinMaxScaler()
|
||||
elif method == "maxabs_scaling":
|
||||
scaler = MaxAbsScaler()
|
||||
elif method == "quantile_normal":
|
||||
scaler = QuantileTransformer(output_distribution="normal", random_state=42)
|
||||
elif method == "quantile_uniform":
|
||||
scaler = QuantileTransformer(output_distribution="uniform", random_state=42)
|
||||
elif method == "yeo_johnson":
|
||||
scaler = PowerTransformer(method="yeo-johnson")
|
||||
elif method == "box_cox":
|
||||
# Box-Cox requires all positive values
|
||||
scaler = PowerTransformer(
|
||||
method="box-cox",
|
||||
)
|
||||
X_pos = X.copy()
|
||||
|
||||
min_per_column = X_pos.min()
|
||||
|
||||
for col in X_pos.columns:
|
||||
if min_per_column[col] <= 0:
|
||||
X_pos[col] = X_pos[col] + abs(min_per_column[col]) + 1e-6 # tiny offset
|
||||
|
||||
X = X_pos
|
||||
else:
|
||||
raise ValueError(f"Unknown scaling method: {method}")
|
||||
|
||||
# Fit and transform
|
||||
X_scaled = scaler.fit_transform(X)
|
||||
data_frame_scaled = pandas.DataFrame(X_scaled, columns=X.columns)
|
||||
data_frame_scaled["label"] = labels.values
|
||||
|
||||
return data_frame_scaled
|
||||
|
||||
Reference in New Issue
Block a user