Files
Electrocardiogram/utils.py
2025-11-30 23:28:08 +01:00

58 lines
1.7 KiB
Python

def split_path(full_path):
import os
directory = os.path.dirname(full_path)
filename = os.path.splitext(os.path.basename(full_path))[0]
return directory, filename
def write_textfile(path, data_list):
with open(path, "w") as file:
for data in data_list:
file.write(f"{data} \n")
def missing_value_handler(data_path):
import pandas
from sklearn.impute import KNNImputer
data_directory, data_filename = split_path(data_path)
data_frame = pandas.read_csv(data_path)
columns = list(data_frame.head(0))
# remove column id
if "id" in columns:
data_frame = data_frame.drop("id", axis="columns")
columns = list(data_frame.head(0))
write_textfile(f"{data_directory}/columns.txt", columns)
# find missing values
missing_value_counts = data_frame.isna().sum()
write_textfile(f"{data_directory}/missing.txt", missing_value_counts)
# fill missing values - KNNImputer
imputer = KNNImputer(n_neighbors=5)
data_imputed = imputer.fit_transform(data_frame)
data_frame_imputed = pandas.DataFrame(data_imputed, columns=columns)
missing_value_counts = data_frame_imputed.isna().sum()
write_textfile(f"{data_directory}/no_missing.txt", missing_value_counts)
return data_frame_imputed
def scaling_handler(data_frame, method="robust_scaling"):
if method == "robust_scaling":
import pandas
from sklearn.preprocessing import RobustScaler
labels = data_frame["label"]
scaler = RobustScaler()
x = data_frame.drop("label", axis=1)
x_scale = scaler.fit_transform(x)
data_frame_scaled = pandas.DataFrame(x_scale, columns=x.columns)
data_frame_scaled["label"] = labels.values
return data_frame_scaled