adding predictors.pdf

This commit is contained in:
2025-12-10 21:38:28 +01:00
parent 0f1a05f293
commit ac86621ad2
6 changed files with 508 additions and 62 deletions

View File

@@ -57,11 +57,9 @@ def scaling_handler(data_frame, method="robust_scaling"):
StandardScaler,
)
# Separate features and label
labels = data_frame["label"]
X = data_frame.drop("label", axis=1)
# Choose scaler/transformer
if method == "robust_scaling":
scaler = RobustScaler()
elif method == "standard_scaling":
@@ -87,13 +85,12 @@ def scaling_handler(data_frame, method="robust_scaling"):
for col in X_pos.columns:
if min_per_column[col] <= 0:
X_pos[col] = X_pos[col] + abs(min_per_column[col]) + 1e-6 # tiny offset
X_pos[col] = X_pos[col] + abs(min_per_column[col]) + 1e-6
X = X_pos
else:
raise ValueError(f"Unknown scaling method: {method}")
# Fit and transform
X_scaled = scaler.fit_transform(X)
data_frame_scaled = pandas.DataFrame(X_scaled, columns=X.columns)
data_frame_scaled["label"] = labels.values
@@ -101,16 +98,15 @@ def scaling_handler(data_frame, method="robust_scaling"):
return data_frame_scaled
from sklearn.metrics import (
accuracy_score,
f1_score,
fbeta_score,
precision_score,
recall_score,
)
def get_metrics(y_true, y_pred, prefix=""):
from sklearn.metrics import (
accuracy_score,
f1_score,
fbeta_score,
precision_score,
recall_score,
)
metrics = {}
metrics[f"{prefix}accuracy"] = accuracy_score(y_true, y_pred)
metrics[f"{prefix}f1_macro"] = f1_score(y_true, y_pred, average="macro")
@@ -120,7 +116,6 @@ def get_metrics(y_true, y_pred, prefix=""):
y_true, y_pred, average="macro"
)
# Per-class scores
f1_scores = f1_score(y_true, y_pred, average=None, zero_division=0)
f2_scores = fbeta_score(y_true, y_pred, beta=2, average=None, zero_division=0)
recall_scores = recall_score(y_true, y_pred, average=None, zero_division=0)
@@ -132,7 +127,6 @@ def get_metrics(y_true, y_pred, prefix=""):
metrics[f"{prefix}recall_class{i}"] = recall_scores[i]
metrics[f"{prefix}precision_class{i}"] = precision_scores[i]
# Confusion-matrix components
TP = sum((y_true == 1) & (y_pred == 1))
TN = sum((y_true == 0) & (y_pred == 0))
FP = sum((y_true == 0) & (y_pred == 1))
@@ -151,24 +145,13 @@ import pandas as pd
def average_fold_results(fold_results):
"""
Computes the average of metrics over multiple folds.
fold_results: list of dictionaries, each containing metrics for one fold
Returns:
dict of average metrics
"""
if not fold_results:
return {}
# Convert list of dicts to DataFrame
df = pd.DataFrame(fold_results)
# Compute mean for each column
avg_metrics = df.mean().to_dict()
# Convert any NumPy types to float
for k, v in avg_metrics.items():
if isinstance(v, (np.float32, np.float64)):
avg_metrics[k] = float(v)