comparing models
This commit is contained in:
62
utils.py
Normal file
62
utils.py
Normal file
@@ -0,0 +1,62 @@
|
||||
"""
|
||||
Saeed Khosravi - 27 Nov 2025
|
||||
"""
|
||||
|
||||
|
||||
def split_path(full_path):
|
||||
import os
|
||||
|
||||
directory = os.path.dirname(full_path)
|
||||
filename = os.path.splitext(os.path.basename(full_path))[0]
|
||||
return directory, filename
|
||||
|
||||
|
||||
def write_textfile(path, data_list):
|
||||
with open(path, "w") as file:
|
||||
for data in data_list:
|
||||
file.write(f"{data} \n")
|
||||
|
||||
|
||||
def missing_value_handler(data_path):
|
||||
import pandas
|
||||
from sklearn.impute import KNNImputer
|
||||
|
||||
data_directory, data_filename = split_path(data_path)
|
||||
|
||||
data_frame = pandas.read_csv(data_path)
|
||||
|
||||
columns = list(data_frame.head(0))
|
||||
# remove column id
|
||||
if "id" in columns:
|
||||
data_frame = data_frame.drop("id", axis="columns")
|
||||
|
||||
columns = list(data_frame.head(0))
|
||||
write_textfile(f"{data_directory}/columns.txt", columns)
|
||||
|
||||
# find missing values
|
||||
missing_value_counts = data_frame.isna().sum()
|
||||
write_textfile(f"{data_directory}/missing.txt", missing_value_counts)
|
||||
|
||||
# fill missing values - KNNImputer
|
||||
|
||||
imputer = KNNImputer(n_neighbors=5)
|
||||
data_imputed = imputer.fit_transform(data_frame)
|
||||
data_frame_imputed = pandas.DataFrame(data_imputed, columns=columns)
|
||||
|
||||
missing_value_counts = data_frame_imputed.isna().sum()
|
||||
write_textfile(f"{data_directory}/no_missing.txt", missing_value_counts)
|
||||
return data_frame_imputed
|
||||
|
||||
|
||||
def scaling_handler(data_frame, method="robust_scaling"):
|
||||
if method == "robust_scaling":
|
||||
import pandas
|
||||
from sklearn.preprocessing import RobustScaler
|
||||
|
||||
labels = data_frame["label"]
|
||||
scaler = RobustScaler()
|
||||
x = data_frame.drop("label", axis=1)
|
||||
x_scale = scaler.fit_transform(x)
|
||||
data_frame_scaled = pandas.DataFrame(x_scale, columns=x.columns)
|
||||
data_frame_scaled["label"] = labels.values
|
||||
return data_frame_scaled
|
||||
Reference in New Issue
Block a user