Electrocardiogram/utils.py

def split_path(full_path):
    import os

    directory = os.path.dirname(full_path)
    filename = os.path.splitext(os.path.basename(full_path))[0]
    return directory, filename


def write_textfile(path, data_list):
    with open(path, "w") as file:
        for data in data_list:
            file.write(f"{data} \n")


def missing_value_handler(data_path):
    import pandas
    from sklearn.impute import KNNImputer

    data_directory, data_filename = split_path(data_path)

    data_frame = pandas.read_csv(data_path)

    columns = list(data_frame.head(0))
    # remove column id
    if "id" in columns:
        data_frame = data_frame.drop("id", axis="columns")

    columns = list(data_frame.head(0))
    write_textfile(f"{data_directory}/columns.txt", columns)

    # find missing values
    missing_value_counts = data_frame.isna().sum()
    write_textfile(f"{data_directory}/missing.txt", missing_value_counts)

    # fill missing values - KNNImputer

    imputer = KNNImputer(n_neighbors=5)
    data_imputed = imputer.fit_transform(data_frame)
    data_frame_imputed = pandas.DataFrame(data_imputed, columns=columns)

    missing_value_counts = data_frame_imputed.isna().sum()
    write_textfile(f"{data_directory}/no_missing.txt", missing_value_counts)
    return data_frame_imputed


def scaling_handler(data_frame, method="robust_scaling"):
    if method == "robust_scaling":
        import pandas
        from sklearn.preprocessing import RobustScaler

        labels = data_frame["label"]
        scaler = RobustScaler()
        x = data_frame.drop("label", axis=1)
        x_scale = scaler.fit_transform(x)
        data_frame_scaled = pandas.DataFrame(x_scale, columns=x.columns)
        data_frame_scaled["label"] = labels.values
        return data_frame_scaled