editing readme.md

This commit is contained in:
2025-12-01 00:34:33 +01:00
parent f6a86cfbe9
commit ad01d6a3db
8 changed files with 1021 additions and 1013 deletions

View File

@@ -91,21 +91,21 @@ We are dealing with an exteremly imbalance dataset related to electrocardiogram
## STEP 5:
Current results taken:
Current results taken KMEANS_SMOTE:
| model | stage | accuracy | f1_macro | f2_macro | recall_macro | precision_macro | f1_class0 | f1_class1 | f2_class0 | f2_class1 | recall_class0 | recall_class1 | precision_class0 | precision_class1 | TP | TN | FP | FN |
|-----------------------|-------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|-----|-------|----|----|
| CatBoost_balanced | train | 0.9843784049402589 | 0.8696686267343388 | 0.8824472728294012 | 0.8916952848998795 | 0.8508242781484853 | 0.9919396338322237 | 0.7473976196364541 | 0.9908276010500254 | 0.7740669446087769 | 0.9900881006639566 | 0.7933024691358025 | 0.9938004847319636 | 0.7078480715650071 | 789 | 26898 | 140 | 19 |
| CatBoost_balanced | test | 0.9802604802604803 | 0.8348421298822796 | 0.8461546793313885 | 0.8541662696976049 | 0.8176680164072361 | 0.9898162729658793 | 0.6798679867986799 | 0.988757446094471 | 0.703551912568306 | 0.9880528191154894 | 0.7202797202797203 | 0.991586032814472 | 0.64375 | 103 | 4714 | 57 | 40 |
| LGBM_KMEANS_SMOTE | train | 0.9883286128479746 | 0.8784419356817057 | 0.8436008106620193 | 0.8240767336379762 | 0.9582821430574249 | 0.9940169232360254 | 0.7628669481273861 | 0.9966698960611392 | 0.6905317252628993 | 0.9984466771524954 | 0.6497067901234568 | 0.9896275269971563 | 0.9269367591176938 | 775 | 27036 | 2 | 33 |
| LGBM_KMEANS_SMOTE | test | 0.9865689865689866 | 0.8543196878009516 | 0.8121616449258658 | 0.7895809912158687 | 0.9600745182511498 | 0.9931221342225928 | 0.7155172413793104 | 0.9964866786565728 | 0.6278366111951589 | 0.9987424020121568 | 0.5804195804195804 | 0.9875647668393782 | 0.9325842696629213 | 83 | 4765 | 6 | 60 |
| CatBoost_balanced_knn10 | train | 0.9843784049402589 | 0.8696686267343388 | 0.8824472728294012 | 0.8916952848998795 | 0.8508242781484853 | 0.9919396338322237 | 0.7473976196364541 | 0.9908276010500254 | 0.7740669446087769 | 0.9900881006639566 | 0.7933024691358025 | 0.9938004847319636 | 0.7078480715650071 | 789 | 26898 | 140 | 19 |
| CatBoost_balanced_knn10 | test | 0.9802604802604803 | 0.8348421298822796 | 0.8461546793313885 | 0.8541662696976049 | 0.8176680164072361 | 0.9898162729658793 | 0.6798679867986799 | 0.988757446094471 | 0.703551912568306 | 0.9880528191154894 | 0.7202797202797203 | 0.991586032814472 | 0.64375 | 103 | 4714 | 57 | 40 |
| LGBM_KMEANS_SMOTE_knn10 | train | 0.9883286128479746 | 0.8784419356817057 | 0.8436008106620193 | 0.8240767336379762 | 0.9582821430574249 | 0.9940169232360254 | 0.7628669481273861 | 0.9966698960611392 | 0.6905317252628993 | 0.9984466771524954 | 0.6497067901234568 | 0.9896275269971563 | 0.9269367591176938 | 775 | 27036 | 2 | 33 |
| LGBM_KMEANS_SMOTE_knn10 | test | 0.9865689865689866 | 0.8543196878009516 | 0.8121616449258658 | 0.7895809912158687 | 0.9600745182511498 | 0.9931221342225928 | 0.7155172413793104 | 0.9964866786565728 | 0.6278366111951589 | 0.9987424020121568 | 0.5804195804195804 | 0.9875647668393782 | 0.9325842696629213 | 83 | 4765 | 6 | 60 |
## next steps:
```
✅ 1. Stratified K-fold only apply on train.
🗹 2. train LGBM model using KMEANS_SMOTE with k_neighbors=10 (fine-tune remained)
🗹 3. train Cat_boost using KMEANS_SMOTE with k_neighbors=10 (fine-tune remained)
🗹 2. train LGBM model using KMEANS_SMOTE with knn k_neighbors=10 (fine-tune remained)
🗹 3. train Cat_boost using KMEANS_SMOTE with knn k_neighbors=10 (fine-tune remained)
🗹 4. implement proposed methods of this article : https://1drv.ms/b/c/ab2a38fe5c318317/IQBEDsSFcYj6R6AMtOnh0X6DAZUlFqAYq19WT8nTeXomFwg
🗹 5. compare proposed model with SMOTE vs oversampling balancing method
```

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -3,3 +3,7 @@ CatBoost_balanced,train,0.9843784049402589,0.8696686267343388,0.8824472728294012
CatBoost_balanced,test,0.9802604802604803,0.8348421298822796,0.8461546793313885,0.8541662696976049,0.8176680164072361,0.9898162729658793,0.6798679867986799,0.988757446094471,0.703551912568306,0.9880528191154894,0.7202797202797203,0.991586032814472,0.64375,103,4714,57,40
LGBM_KMEANS_SMOTE,train,0.9883286128479746,0.8784419356817057,0.8436008106620193,0.8240767336379762,0.9582821430574249,0.9940169232360254,0.7628669481273861,0.9966698960611392,0.6905317252628993,0.9984466771524954,0.6497067901234568,0.9896275269971563,0.9269367591176938,775,27036,2,33
LGBM_KMEANS_SMOTE,test,0.9865689865689866,0.8543196878009516,0.8121616449258658,0.7895809912158687,0.9600745182511498,0.9931221342225928,0.7155172413793104,0.9964866786565728,0.6278366111951589,0.9987424020121568,0.5804195804195804,0.9875647668393782,0.9325842696629213,83,4765,6,60
CatBoost_balanced,train,0.9843784049402589,0.8696686267343388,0.8824472728294012,0.8916952848998795,0.8508242781484853,0.9919396338322237,0.7473976196364541,0.9908276010500254,0.7740669446087769,0.9900881006639566,0.7933024691358025,0.9938004847319636,0.7078480715650071,789,26898,140,19
CatBoost_balanced,test,0.9802604802604803,0.8348421298822796,0.8461546793313885,0.8541662696976049,0.8176680164072361,0.9898162729658793,0.6798679867986799,0.988757446094471,0.703551912568306,0.9880528191154894,0.7202797202797203,0.991586032814472,0.64375,103,4714,57,40
LGBM_KMEANS_SMOTE,train,0.9883286128479746,0.8784419356817057,0.8436008106620193,0.8240767336379762,0.9582821430574249,0.9940169232360254,0.7628669481273861,0.9966698960611392,0.6905317252628993,0.9984466771524954,0.6497067901234568,0.9896275269971563,0.9269367591176938,775,27036,2,33
LGBM_KMEANS_SMOTE,test,0.9865689865689866,0.8543196878009516,0.8121616449258658,0.7895809912158687,0.9600745182511498,0.9931221342225928,0.7155172413793104,0.9964866786565728,0.6278366111951589,0.9987424020121568,0.5804195804195804,0.9875647668393782,0.9325842696629213,83,4765,6,60
1 model stage accuracy f1_macro f2_macro recall_macro precision_macro f1_class0 f1_class1 f2_class0 f2_class1 recall_class0 recall_class1 precision_class0 precision_class1 TP TN FP FN
3 CatBoost_balanced test 0.9802604802604803 0.8348421298822796 0.8461546793313885 0.8541662696976049 0.8176680164072361 0.9898162729658793 0.6798679867986799 0.988757446094471 0.703551912568306 0.9880528191154894 0.7202797202797203 0.991586032814472 0.64375 103 4714 57 40
4 LGBM_KMEANS_SMOTE train 0.9883286128479746 0.8784419356817057 0.8436008106620193 0.8240767336379762 0.9582821430574249 0.9940169232360254 0.7628669481273861 0.9966698960611392 0.6905317252628993 0.9984466771524954 0.6497067901234568 0.9896275269971563 0.9269367591176938 775 27036 2 33
5 LGBM_KMEANS_SMOTE test 0.9865689865689866 0.8543196878009516 0.8121616449258658 0.7895809912158687 0.9600745182511498 0.9931221342225928 0.7155172413793104 0.9964866786565728 0.6278366111951589 0.9987424020121568 0.5804195804195804 0.9875647668393782 0.9325842696629213 83 4765 6 60
6 CatBoost_balanced train 0.9843784049402589 0.8696686267343388 0.8824472728294012 0.8916952848998795 0.8508242781484853 0.9919396338322237 0.7473976196364541 0.9908276010500254 0.7740669446087769 0.9900881006639566 0.7933024691358025 0.9938004847319636 0.7078480715650071 789 26898 140 19
7 CatBoost_balanced test 0.9802604802604803 0.8348421298822796 0.8461546793313885 0.8541662696976049 0.8176680164072361 0.9898162729658793 0.6798679867986799 0.988757446094471 0.703551912568306 0.9880528191154894 0.7202797202797203 0.991586032814472 0.64375 103 4714 57 40
8 LGBM_KMEANS_SMOTE train 0.9883286128479746 0.8784419356817057 0.8436008106620193 0.8240767336379762 0.9582821430574249 0.9940169232360254 0.7628669481273861 0.9966698960611392 0.6905317252628993 0.9984466771524954 0.6497067901234568 0.9896275269971563 0.9269367591176938 775 27036 2 33
9 LGBM_KMEANS_SMOTE test 0.9865689865689866 0.8543196878009516 0.8121616449258658 0.7895809912158687 0.9600745182511498 0.9931221342225928 0.7155172413793104 0.9964866786565728 0.6278366111951589 0.9987424020121568 0.5804195804195804 0.9875647668393782 0.9325842696629213 83 4765 6 60

View File

@@ -285,7 +285,6 @@ models = [
class_weights=[1, scale_pos],
random_state=42,
verbose=0,
n_jobs=-1,
),
"smote": False,
},

View File

@@ -42,19 +42,24 @@ def train_model_with_kfold(
if smote:
if smote_method.lower() == "kmeans":
from collections import Counter
minority = Counter(y_train)[1]
k_neighbors = min(10, max(2, minority // 10))
sampler = KMeansSMOTE(
k_neighbors=10,
k_neighbors=k_neighbors,
cluster_balance_threshold=0.1,
random_state=random_state,
)
elif smote_method.lower() == "smote":
sampler = SMOTE(k_neighbors=5, random_state=random_state)
sampler = SMOTE(k_neighbors=15, random_state=random_state)
elif smote_method.lower() == "svmsmote":
sampler = SVMSMOTE(k_neighbors=5, random_state=random_state)
sampler = SVMSMOTE(k_neighbors=15, random_state=random_state)
elif smote_method.lower() == "borderline":
sampler = BorderlineSMOTE(k_neighbors=5, random_state=random_state)
sampler = BorderlineSMOTE(k_neighbors=15, random_state=random_state)
elif smote_method.lower() == "adasyn":
sampler = ADASYN(n_neighbors=5, random_state=random_state)
sampler = ADASYN(n_neighbors=15, random_state=random_state)
else:
raise ValueError(f"Unknown smote_method: {smote_method}")