adding predictors.pdf
This commit is contained in:
@@ -57,11 +57,9 @@ def scaling_handler(data_frame, method="robust_scaling"):
|
||||
StandardScaler,
|
||||
)
|
||||
|
||||
# Separate features and label
|
||||
labels = data_frame["label"]
|
||||
X = data_frame.drop("label", axis=1)
|
||||
|
||||
# Choose scaler/transformer
|
||||
if method == "robust_scaling":
|
||||
scaler = RobustScaler()
|
||||
elif method == "standard_scaling":
|
||||
@@ -87,13 +85,12 @@ def scaling_handler(data_frame, method="robust_scaling"):
|
||||
|
||||
for col in X_pos.columns:
|
||||
if min_per_column[col] <= 0:
|
||||
X_pos[col] = X_pos[col] + abs(min_per_column[col]) + 1e-6 # tiny offset
|
||||
X_pos[col] = X_pos[col] + abs(min_per_column[col]) + 1e-6
|
||||
|
||||
X = X_pos
|
||||
else:
|
||||
raise ValueError(f"Unknown scaling method: {method}")
|
||||
|
||||
# Fit and transform
|
||||
X_scaled = scaler.fit_transform(X)
|
||||
data_frame_scaled = pandas.DataFrame(X_scaled, columns=X.columns)
|
||||
data_frame_scaled["label"] = labels.values
|
||||
@@ -101,16 +98,15 @@ def scaling_handler(data_frame, method="robust_scaling"):
|
||||
return data_frame_scaled
|
||||
|
||||
|
||||
from sklearn.metrics import (
|
||||
accuracy_score,
|
||||
f1_score,
|
||||
fbeta_score,
|
||||
precision_score,
|
||||
recall_score,
|
||||
)
|
||||
|
||||
|
||||
def get_metrics(y_true, y_pred, prefix=""):
|
||||
from sklearn.metrics import (
|
||||
accuracy_score,
|
||||
f1_score,
|
||||
fbeta_score,
|
||||
precision_score,
|
||||
recall_score,
|
||||
)
|
||||
|
||||
metrics = {}
|
||||
metrics[f"{prefix}accuracy"] = accuracy_score(y_true, y_pred)
|
||||
metrics[f"{prefix}f1_macro"] = f1_score(y_true, y_pred, average="macro")
|
||||
@@ -120,7 +116,6 @@ def get_metrics(y_true, y_pred, prefix=""):
|
||||
y_true, y_pred, average="macro"
|
||||
)
|
||||
|
||||
# Per-class scores
|
||||
f1_scores = f1_score(y_true, y_pred, average=None, zero_division=0)
|
||||
f2_scores = fbeta_score(y_true, y_pred, beta=2, average=None, zero_division=0)
|
||||
recall_scores = recall_score(y_true, y_pred, average=None, zero_division=0)
|
||||
@@ -132,7 +127,6 @@ def get_metrics(y_true, y_pred, prefix=""):
|
||||
metrics[f"{prefix}recall_class{i}"] = recall_scores[i]
|
||||
metrics[f"{prefix}precision_class{i}"] = precision_scores[i]
|
||||
|
||||
# Confusion-matrix components
|
||||
TP = sum((y_true == 1) & (y_pred == 1))
|
||||
TN = sum((y_true == 0) & (y_pred == 0))
|
||||
FP = sum((y_true == 0) & (y_pred == 1))
|
||||
@@ -151,24 +145,13 @@ import pandas as pd
|
||||
|
||||
|
||||
def average_fold_results(fold_results):
|
||||
"""
|
||||
Computes the average of metrics over multiple folds.
|
||||
|
||||
fold_results: list of dictionaries, each containing metrics for one fold
|
||||
|
||||
Returns:
|
||||
dict of average metrics
|
||||
"""
|
||||
if not fold_results:
|
||||
return {}
|
||||
|
||||
# Convert list of dicts to DataFrame
|
||||
df = pd.DataFrame(fold_results)
|
||||
|
||||
# Compute mean for each column
|
||||
avg_metrics = df.mean().to_dict()
|
||||
|
||||
# Convert any NumPy types to float
|
||||
for k, v in avg_metrics.items():
|
||||
if isinstance(v, (np.float32, np.float64)):
|
||||
avg_metrics[k] = float(v)
|
||||
|
||||
Reference in New Issue
Block a user