init commit
This commit is contained in:
@@ -1,101 +0,0 @@
|
||||
# TITLE: Multi-Class Normal Distribution Cubic Clusters Dataset Generator
|
||||
# AUTHOR: Dr. Hossein Moosaei, Saeed Khosravi
|
||||
# Date: 10/09/2020
|
||||
|
||||
# NORMALLY DISTRIBUTED CLUSTERS is a data generator.
|
||||
# It generates a series of random centers for multivariate
|
||||
#normal distributions. NDC randomly generates a fraction
|
||||
# of data for each center, i.e. what fraction of data points
|
||||
# will come from this center. NDC randomly generates a
|
||||
# separating plane. Based on this plane, classes for are
|
||||
# chosen for each center. NDC then randomly generates the
|
||||
# points from the distributions. NDC can increase
|
||||
# inseparability by increasng variances of distributions.
|
||||
# A measure of "true" separability is obtained by looking
|
||||
# at how many points end up on the wrong side of the
|
||||
# separating plane. All values are taken as integers
|
||||
# for simplicity.
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
class MC_NDCC:
|
||||
|
||||
def __init__(self,n_centers, n_samples, n_features, n_classes):
|
||||
# self.n_samples = int(input("Enter number of samples: \n"))
|
||||
# self.n_features = int(input("Enter number of features: \n"))
|
||||
# self.n_classes = int(input("Enter number of classes: \n"))
|
||||
centers = [100, 300, 500, 700]
|
||||
self.centers_list = centers[0:n_centers]
|
||||
self.n_samples = n_samples
|
||||
self.n_features = n_features
|
||||
self.n_classes = n_classes
|
||||
self.center_points = self.centers_matrix(self.centers_list, self.n_features)
|
||||
self.n_centers = 2*len(self.centers_list)*self.n_features
|
||||
self.class_locations = self.class_center_locations(self.n_classes, self.n_centers)
|
||||
self.ss = self.sample_spliter(self.n_samples, self.n_classes, self.n_centers)
|
||||
r, c = self.class_locations.shape
|
||||
self.M = np.zeros((0, self.n_features))
|
||||
self.l = np.zeros((0, 1))
|
||||
for i in range(r):
|
||||
for j in range(c):
|
||||
self.temp = np.random.normal(loc = self.center_points[int(self.class_locations[i, j])],size = (int(self.ss[i,j]), self.n_features),scale = 5)
|
||||
self.label_temp = np.ones((int(self.ss[i,j]), 1))*(i+1)
|
||||
self.l = np.concatenate((self.l, self.label_temp), axis = 0)
|
||||
self.M = np.concatenate((self.M, self.temp) , axis = 0)
|
||||
self.M = np.concatenate((self.M, self.l), axis = 1).astype('int32')
|
||||
np.random.shuffle(self.M)
|
||||
def sample_spliter(self, n_samples, n_classes, n_centers):
|
||||
# This function generates the number of samples belongs to each class
|
||||
# Centers approximately have n_centers/n_classes samples with a small variance
|
||||
count = 0
|
||||
n_cen_fe_cls = int(np.floor(n_centers/n_classes))
|
||||
n_each_c = np.zeros((n_classes, n_cen_fe_cls))
|
||||
while(n_samples > count):
|
||||
r = np.random.randint(n_classes)
|
||||
r2 = np.random.randint(n_cen_fe_cls)
|
||||
n_each_c[r, r2] += 1
|
||||
count += 1
|
||||
return n_each_c
|
||||
|
||||
def class_center_locations(self, n_classes, n_centers):
|
||||
|
||||
# This function specifies which center
|
||||
# points belong to which classes
|
||||
|
||||
# It returns a matrix in size of n_classess by
|
||||
# n_centers_for_each_class that means a row for each class
|
||||
|
||||
rng = np.random.default_rng()
|
||||
# Generate list of random non-repeatative numbers from 1 to n_center
|
||||
locs = rng.choice(n_centers, n_centers, replace=False)
|
||||
# number of centers for each class
|
||||
n_cen_fe_cls = int(np.floor(n_centers/n_classes))
|
||||
cls_locs = np.zeros((n_classes,n_cen_fe_cls))
|
||||
k = 0
|
||||
for i in range(n_classes):
|
||||
for j in range(n_cen_fe_cls):
|
||||
cls_locs[i,j] = locs[k]
|
||||
k += 1
|
||||
return cls_locs
|
||||
|
||||
def centers_matrix(self, centers_list, n_features):
|
||||
# This function returns the matrix of center locations
|
||||
# based on centers_list in n_features space
|
||||
n_centers = 2*len(centers_list)*n_features
|
||||
centers_matrix = np.zeros((n_centers, n_features))
|
||||
for i in range(len(centers_list)):
|
||||
for j in range(n_features):
|
||||
centers_matrix[i*2*n_features + 2*j , j] = centers_list[i]
|
||||
centers_matrix[i*2*n_features + 2*j+1, j] = -centers_list[i]
|
||||
return centers_matrix
|
||||
|
||||
|
||||
def get_matrix(self):
|
||||
# Get the dataset as a numpy matrix
|
||||
return self.M
|
||||
|
||||
def get_csv(self, filename):
|
||||
# Save the dataset as csv file
|
||||
df = pd.DataFrame(self.M)
|
||||
df.to_csv(filename, header = False, index = False)
|
||||
print(f'Dataset saved as {filename} in current directory. ')
|
||||
@@ -1,17 +0,0 @@
|
||||
|
||||
DATASETS = [
|
||||
"bill",
|
||||
"brain",
|
||||
"glass",
|
||||
"hcv",
|
||||
"heart",
|
||||
"ionosphere",
|
||||
"iris",
|
||||
"raisin",
|
||||
"sonar",
|
||||
"wholesale",
|
||||
"wine",
|
||||
"yeast"
|
||||
]
|
||||
|
||||
|
||||
104
models/LSTSVM.py
104
models/LSTSVM.py
@@ -1,104 +0,0 @@
|
||||
"""
|
||||
Article : Least squares twin support vector machines for pattern classification
|
||||
Link : https://sci-hub.tw/https://www.sciencedirect.com/science/article/abs/pii/S0957417408006854
|
||||
Author : Saeed Khosravi
|
||||
"""
|
||||
import numpy as np
|
||||
class LSTSVM:
|
||||
"""
|
||||
Least Squares Support Vector Machines
|
||||
A = Instances with label +1
|
||||
B = Instances with label -1
|
||||
C1 = hyperparameter for hyperplane 1
|
||||
C2 = hyperparameter for hyperplane 2
|
||||
|
||||
"""
|
||||
def __init__(self, X, y, C1, C2, eps = 1e-4):
|
||||
self.A = X[np.ix_(y[:,0] == 1),:][0,:,:]
|
||||
self.B = X[np.ix_(y[:,0] == -1),:][0,:,:]
|
||||
self.C1 = C1
|
||||
self.C2 = C2
|
||||
self.eps = eps
|
||||
|
||||
def fit(self):
|
||||
A = self.A
|
||||
B = self.B
|
||||
C1 = self.C1
|
||||
C2 = self.C2
|
||||
eps = self.eps
|
||||
m1, n = A.shape
|
||||
m2, n = B.shape
|
||||
e1 = np.ones((m1, 1))
|
||||
e2 = np.ones((m2, 1))
|
||||
X = np.concatenate((A, B), axis=0)
|
||||
G = np.concatenate((A, e1), axis=1)
|
||||
H = np.concatenate((B, e2), axis=1)
|
||||
|
||||
|
||||
if(m1 < m2):
|
||||
Y = self.calc_Y_or_Z(H)
|
||||
|
||||
#w1, b1
|
||||
GYGT = np.dot(np.dot(G, Y), G.T)
|
||||
I = np.eye(GYGT.shape[0], GYGT.shape[1])
|
||||
w1_b1 = - np.dot(Y - np.dot(np.dot(np.dot(Y, G.T), np.linalg.inv(C1*I + GYGT)), np.dot(G, Y)),
|
||||
np.dot(H.T, np.ones((H.T.shape[1], 1))))
|
||||
w1 = w1_b1[:-1, :]
|
||||
b1 = w1_b1[ -1, :]
|
||||
|
||||
#w2, b2
|
||||
w2_b2 = C2 * np.dot(Y - np.dot(np.dot(np.dot(Y, G.T), np.linalg.inv((I/C2)+GYGT)), np.dot(G, Y)),
|
||||
np.dot(G.T, np.ones((G.T.shape[1], 1))))
|
||||
w2 = w2_b2[:-1, :]
|
||||
b2 = w2_b2[ -1, :]
|
||||
|
||||
else:
|
||||
Z = self.calc_Y_or_Z(G)
|
||||
|
||||
#w1, b1
|
||||
HZHT = np.dot(np.dot(H, Z), H.T)
|
||||
I = np.eye(HZHT.shape[0], HZHT.shape[1])
|
||||
w1_b1 = -C1*np.dot(Z - np.dot(np.dot(np.dot(Z, H.T), np.linalg.inv((I/C1) + HZHT)), np.dot(H, Z)),
|
||||
np.dot(H.T, np.ones((H.T.shape[1], 1))))
|
||||
w1 = w1_b1[:-1, :]
|
||||
b1 = w1_b1[ -1, :]
|
||||
|
||||
#w2, b2
|
||||
w2_b2 = np.dot(Z - np.dot(np.dot(np.dot(Z, H.T), np.linalg.inv(C2*I + HZHT)), np.dot(H, Z)),
|
||||
np.dot(G.T, np.ones((G.T.shape[1], 1))))
|
||||
w2 = w2_b2[:-1, :]
|
||||
b2 = w2_b2[ -1, :]
|
||||
|
||||
self.w1 = w1
|
||||
self.w2 = w2
|
||||
self.b1 = b1
|
||||
self.b2 = b2
|
||||
|
||||
def predict(self, x_test, y_test):
|
||||
distance1 = np.abs(np.dot(x_test, self.w1) + self.b1)
|
||||
distance2 = np.abs(np.dot(x_test, self.w2) + self.b2)
|
||||
y_pred = np.zeros_like(y_test)
|
||||
for d in range(y_pred.shape[0]):
|
||||
if (distance1[d] < distance2[d]):
|
||||
y_pred[d][0] = 1;
|
||||
else:
|
||||
y_pred[d][0] = -1;
|
||||
self.preds = y_pred
|
||||
|
||||
def calc_Y_or_Z(self, M):
|
||||
MMT = np.dot(M, M.T)
|
||||
I = np.eye(MMT.shape[0], MMT.shape[1])
|
||||
tmp = np.dot(np.dot(M.T, np.linalg.inv(self.eps*I + MMT)), M)
|
||||
I = np.eye(tmp.shape[0], tmp.shape[1])
|
||||
return (1/self.eps)*(I-tmp)
|
||||
|
||||
def get_params(self):
|
||||
return self.w1, self.b1, self.w2, self.b2
|
||||
|
||||
|
||||
def get_preds(self):
|
||||
return self.preds
|
||||
|
||||
def score(self, y_test):
|
||||
accuracy = np.sum(self.preds == y_test)/y_test.shape[0]
|
||||
return accuracy
|
||||
@@ -1,160 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
class NewtonUTSVM:
|
||||
def __init__(self, X, y, U, C, eps=1e-4):
|
||||
self.X = np.asarray(X, dtype=np.float64)
|
||||
self.y = np.asarray(y, dtype=np.float64).reshape(-1, 1)
|
||||
self.U = np.asarray(U, dtype=np.float64)
|
||||
self.C = np.asarray(C, dtype=np.float64)
|
||||
self.eps = eps
|
||||
|
||||
def fit(self):
|
||||
np.random.seed(42)
|
||||
self.w1 = np.random.normal(0, 0.01, (self.X.shape[1], 1))
|
||||
self.b1 = 0.0
|
||||
self.w2 = np.random.normal(0, 0.01, (self.X.shape[1], 1))
|
||||
self.b2 = 0.0
|
||||
|
||||
for _ in range(5):
|
||||
self.w1, self.b1 = self.plane1(self.X, self.y, self.U,
|
||||
self.C[0], self.C[1], self.C[2], self.eps)
|
||||
self.w2, self.b2 = self.plane2(self.X, self.y, self.U,
|
||||
self.C[3], self.C[4], self.C[5], self.eps)
|
||||
|
||||
def predict(self, x_test):
|
||||
x_test = np.asarray(x_test, dtype=np.float64)
|
||||
|
||||
dist1 = self._safe_distance(x_test, self.w1, self.b1)
|
||||
dist2 = self._safe_distance(x_test, self.w2, self.b2)
|
||||
|
||||
y_pred = np.where(dist1 < dist2, 1, -1).reshape(-1, 1)
|
||||
self.preds = y_pred
|
||||
return y_pred
|
||||
|
||||
def _safe_distance(self, X, w, b):
|
||||
norm = np.linalg.norm(w)
|
||||
if norm < 1e-10:
|
||||
return np.full((X.shape[0],), np.inf)
|
||||
return np.abs(X @ w + b) / norm
|
||||
|
||||
def plane1(self, X, y, U, C1, C2, C3, eps):
|
||||
A = X[y[:,0] == 1]
|
||||
B = X[y[:,0] == -1]
|
||||
|
||||
T1 = np.hstack([A, np.ones((A.shape[0], 1))])
|
||||
T2 = np.hstack([B, np.ones((B.shape[0], 1))])
|
||||
T3 = np.hstack([U, np.ones((U.shape[0], 1))])
|
||||
|
||||
Z = np.random.normal(0, 0.01, (X.shape[1]+1, 1))
|
||||
prev_Z = np.zeros_like(Z)
|
||||
|
||||
learning_rate = 0.1
|
||||
best_loss = float('inf')
|
||||
|
||||
for count in range(100):
|
||||
e2 = np.ones((B.shape[0], 1))
|
||||
eu = np.ones((U.shape[0], 1))
|
||||
|
||||
margin_B = e2 + T2 @ Z
|
||||
margin_U = (-1 + eps)*eu - T3 @ Z
|
||||
|
||||
grad = (T1.T @ (T1 @ Z) +
|
||||
C1 * T2.T @ self.func(margin_B, 'pf') +
|
||||
C2 * Z -
|
||||
C3 * T3.T @ self.func(margin_U, 'pf'))
|
||||
|
||||
D1 = self.mat_diag(self.func(margin_B, 'pf') > 0)
|
||||
D2 = self.func(margin_U, 'pf') > 0
|
||||
hessian = (T1.T @ T1 +
|
||||
C1 * T2.T @ D1 @ T2 +
|
||||
C2 * np.eye(Z.shape[0]) +
|
||||
C3 * T3.T @ np.diag(D2.flatten()) @ T3)
|
||||
|
||||
hessian += 1e-4 * np.eye(hessian.shape[0])
|
||||
|
||||
delta = np.linalg.solve(hessian, grad)
|
||||
Z -= learning_rate * delta
|
||||
|
||||
current_loss = np.linalg.norm(grad)
|
||||
if current_loss < best_loss:
|
||||
best_loss = current_loss
|
||||
learning_rate = min(learning_rate * 1.1, 1.0)
|
||||
else:
|
||||
learning_rate = max(learning_rate * 0.5, 1e-4)
|
||||
|
||||
if np.linalg.norm(Z - prev_Z) < self.eps:
|
||||
break
|
||||
prev_Z = Z.copy()
|
||||
|
||||
return Z[:-1], Z[-1][0]
|
||||
|
||||
def plane2(self, X, y, U, C4, C5, C6, eps):
|
||||
A = X[y[:,0] == 1]
|
||||
B = X[y[:,0] == -1]
|
||||
|
||||
# Add bias terms
|
||||
G1 = np.hstack([B, np.ones((B.shape[0], 1))])
|
||||
G2 = np.hstack([A, np.ones((A.shape[0], 1))])
|
||||
G3 = np.hstack([U, np.ones((U.shape[0], 1))])
|
||||
|
||||
Y = np.random.normal(0, 0.01, (X.shape[1]+1, 1))
|
||||
prev_Y = np.zeros_like(Y)
|
||||
|
||||
learning_rate = 0.1
|
||||
best_loss = float('inf')
|
||||
|
||||
for count in range(100):
|
||||
e1 = np.ones((A.shape[0], 1))
|
||||
eu = np.ones((U.shape[0], 1))
|
||||
|
||||
margin_A = e1 - G2 @ Y
|
||||
margin_U = (-1 + eps)*eu + G3 @ Y
|
||||
|
||||
grad = (G1.T @ (G1 @ Y) -
|
||||
C4 * G2.T @ self.func(margin_A, 'pf') +
|
||||
C5 * Y +
|
||||
C6 * G3.T @ self.func(margin_U, 'pf'))
|
||||
|
||||
D3 = self.func(margin_A, 'pf') > 0
|
||||
D4 = self.func(margin_U, 'pf') > 0
|
||||
hessian = (G1.T @ G1 +
|
||||
C4 * G2.T @ np.diag(D3.flatten()) @ G2 +
|
||||
C5 * np.eye(Y.shape[0]) +
|
||||
C6 * G3.T @ np.diag(D4.flatten()) @ G3)
|
||||
|
||||
hessian += 1e-4 * np.eye(hessian.shape[0])
|
||||
|
||||
delta = np.linalg.solve(hessian, grad)
|
||||
Y -= learning_rate * delta
|
||||
|
||||
current_loss = np.linalg.norm(grad)
|
||||
if current_loss < best_loss:
|
||||
best_loss = current_loss
|
||||
learning_rate = min(learning_rate * 1.1, 1.0)
|
||||
else:
|
||||
learning_rate = max(learning_rate * 0.5, 1e-4)
|
||||
|
||||
if np.linalg.norm(Y - prev_Y) < self.eps:
|
||||
break
|
||||
prev_Y = Y.copy()
|
||||
|
||||
return Y[:-1], Y[-1][0]
|
||||
|
||||
def func(self, x, type='pf', ro=1e20):
|
||||
if type == 'pf':
|
||||
return np.maximum(0, x)
|
||||
elif type == 'sm':
|
||||
return x + (1/ro)*np.log(1+np.exp(-ro*x))
|
||||
|
||||
def mat_diag(self, m):
|
||||
return np.diag(m.flatten())
|
||||
|
||||
def get_params(self):
|
||||
return self.w1, self.b1, self.w2, self.b2
|
||||
|
||||
def get_preds(self):
|
||||
return self.preds
|
||||
|
||||
def score(self, y_test):
|
||||
y = np.asarray(y_test).flatten()
|
||||
return np.mean(self.preds.flatten() == y)
|
||||
@@ -1,144 +0,0 @@
|
||||
import numpy as np
|
||||
from scipy.optimize import minimize
|
||||
from sklearn.base import BaseEstimator, ClassifierMixin
|
||||
|
||||
class S3VM_Constrained(BaseEstimator, ClassifierMixin):
|
||||
|
||||
|
||||
def __init__(self, C = 1.0, M=1e5, eps=1e-4, max_iter=100):
|
||||
|
||||
self.C = C
|
||||
self.M = M
|
||||
self.eps = eps
|
||||
self.max_iter = max_iter
|
||||
self.w = None
|
||||
self.b = None
|
||||
self.y_pred = 0
|
||||
self.y = 0
|
||||
|
||||
def fit(self, X_labeled, y_labeled, X_unlabeled):
|
||||
|
||||
X_labeled = np.asarray(X_labeled, dtype=np.float64)
|
||||
y_labeled = np.asarray(y_labeled, dtype=np.float64).reshape(-1, 1)
|
||||
X_unlabeled = np.asarray(X_unlabeled, dtype=np.float64)
|
||||
|
||||
unique_labels = np.unique(y_labeled)
|
||||
if not (set(unique_labels) <= {1.0, -1.0}):
|
||||
raise ValueError("Labels must be +1 or -1")
|
||||
|
||||
n_labeled, n_features = X_labeled.shape
|
||||
n_unlabeled = X_unlabeled.shape[0]
|
||||
|
||||
self._initialize_parameters(n_features, n_labeled, n_unlabeled)
|
||||
|
||||
X = np.vstack([X_labeled, X_unlabeled])
|
||||
|
||||
for iteration in range(self.max_iter):
|
||||
y_unlabeled = self._predict_unlabeled(X_unlabeled)
|
||||
|
||||
self._optimize_mip(X_labeled, y_labeled, X_unlabeled, y_unlabeled)
|
||||
|
||||
new_labels = self._predict_unlabeled(X_unlabeled)
|
||||
if np.mean(new_labels != y_unlabeled) < self.eps:
|
||||
break
|
||||
|
||||
return self
|
||||
|
||||
def _initialize_parameters(self, n_features, n_labeled, n_unlabeled):
|
||||
|
||||
self.w = np.random.normal(0, 0.01, (n_features, 1))
|
||||
self.b = 0.0
|
||||
self.eta = np.zeros(n_labeled)
|
||||
self.xi = np.zeros(n_unlabeled)
|
||||
self.z = np.zeros(n_unlabeled)
|
||||
self.d = np.random.rand(n_unlabeled)
|
||||
|
||||
def _predict_unlabeled(self, X_unlabeled):
|
||||
|
||||
scores = X_unlabeled @ self.w + self.b
|
||||
return np.where(scores >= 0, 1, -1)
|
||||
|
||||
def _optimize_mip(self, X_labeled, y_labeled, X_unlabeled, y_unlabeled):
|
||||
|
||||
n_labeled, n_features = X_labeled.shape
|
||||
n_unlabeled = X_unlabeled.shape[0]
|
||||
|
||||
x0 = np.concatenate([
|
||||
self.w.flatten(),
|
||||
[self.b],
|
||||
self.eta,
|
||||
self.xi,
|
||||
self.z,
|
||||
self.d
|
||||
])
|
||||
|
||||
bounds = (
|
||||
[(None, None)] * n_features +
|
||||
[(None, None)] +
|
||||
[(0, None)] * n_labeled +
|
||||
[(0, None)] * n_unlabeled +
|
||||
[(0, None)] * n_unlabeled +
|
||||
[(0, 1)] * n_unlabeled
|
||||
)
|
||||
|
||||
constraints = [
|
||||
{
|
||||
'type': 'ineq',
|
||||
'fun': lambda x: y_labeled.flatten() *
|
||||
(X_labeled @ x[:n_features] + x[n_features]) +
|
||||
x[n_features+1:n_features+1+n_labeled] - 1
|
||||
|
||||
},
|
||||
|
||||
{
|
||||
'type': 'ineq',
|
||||
'fun': lambda x: (X_unlabeled @ x[:n_features] - x[n_features] +
|
||||
x[n_features+1+n_labeled:n_features+1+n_labeled+n_unlabeled] +
|
||||
self.M*(1 - x[-n_unlabeled:])) - 1
|
||||
},
|
||||
|
||||
{
|
||||
'type': 'ineq',
|
||||
'fun': lambda x: (-(X_unlabeled @ x[:n_features] - x[n_features]) +
|
||||
x[n_features+1+n_labeled+n_unlabeled:n_features+1+n_labeled+2*n_unlabeled] +
|
||||
self.M*x[-n_unlabeled:]) - 1
|
||||
}
|
||||
]
|
||||
|
||||
def objective(x):
|
||||
w = x[:n_features]
|
||||
eta = x[n_features+1:n_features+1+n_labeled]
|
||||
xi = x[n_features+1+n_labeled:n_features+1+n_labeled+n_unlabeled]
|
||||
z = x[n_features+1+n_labeled+n_unlabeled:n_features+1+n_labeled+2*n_unlabeled]
|
||||
|
||||
return (self.C * (np.sum(eta) + np.sum(xi + z)) + np.sum(np.abs(w)))
|
||||
|
||||
res = minimize(
|
||||
objective,
|
||||
x0,
|
||||
method='SLSQP',
|
||||
bounds=bounds,
|
||||
constraints=constraints,
|
||||
options={'maxiter': 1000}
|
||||
)
|
||||
|
||||
self.w = res.x[:n_features].reshape(-1, 1)
|
||||
self.b = res.x[n_features]
|
||||
self.eta = res.x[n_features+1:n_features+1+n_labeled]
|
||||
self.xi = res.x[n_features+1+n_labeled:n_features+1+n_labeled+n_unlabeled]
|
||||
self.z = res.x[n_features+1+n_labeled+n_unlabeled:n_features+1+n_labeled+2*n_unlabeled]
|
||||
self.d = res.x[-n_unlabeled:]
|
||||
|
||||
def predict(self, X):
|
||||
if self.w is None or self.b is None:
|
||||
raise ValueError("Model not fitted yet")
|
||||
|
||||
X = np.asarray(X, dtype=np.float64)
|
||||
scores = X @ self.w + self.b
|
||||
self.y_pred = np.where(scores >= 0, 1, -1)
|
||||
return self.y_pred
|
||||
|
||||
|
||||
def score(self, y_test):
|
||||
y = np.asarray(y_test).flatten()
|
||||
return np.mean(self.y_pred.flatten() == y)
|
||||
@@ -1,80 +0,0 @@
|
||||
import numpy as np
|
||||
from scipy.optimize import minimize
|
||||
|
||||
class S3VM_Unconstrained:
|
||||
|
||||
def __init__(self, C=1.0, eps=1e-4):
|
||||
self.C = C
|
||||
self.eps = eps
|
||||
self.w = None
|
||||
self.b = None
|
||||
|
||||
def fit(self, X_labeled, y_labeled, X_unlabeled):
|
||||
X_labeled = np.asarray(X_labeled, dtype=np.float64)
|
||||
y_labeled = np.asarray(y_labeled, dtype=np.float64).reshape(-1, 1)
|
||||
X_unlabeled = np.asarray(X_unlabeled, dtype=np.float64)
|
||||
|
||||
unique_labels = np.unique(y_labeled)
|
||||
if not (set(unique_labels) <= {1.0, -1.0}):
|
||||
raise ValueError("Labels must be +1 or -1")
|
||||
|
||||
n_features = X_labeled.shape[1]
|
||||
self.w = np.zeros((n_features, 1))
|
||||
self.b = 0.0
|
||||
|
||||
X_labeled_aug = np.hstack([X_labeled, np.ones((X_labeled.shape[0], 1))])
|
||||
X_unlabeled_aug = np.hstack([X_unlabeled, np.ones((X_unlabeled.shape[0], 1))])
|
||||
|
||||
|
||||
unlabeled_scores = X_unlabeled_aug @ np.vstack([self.w, self.b])
|
||||
y_unlabeled = np.sign(unlabeled_scores)
|
||||
y_unlabeled[y_unlabeled == 0] = 1
|
||||
|
||||
X_aug = np.vstack([X_labeled_aug, X_unlabeled_aug])
|
||||
y = np.vstack([y_labeled, y_unlabeled])
|
||||
|
||||
self._optimize(X_aug, y)
|
||||
|
||||
new_scores = X_unlabeled_aug @ np.vstack([self.w, self.b])
|
||||
if np.all(np.sign(new_scores) == y_unlabeled):
|
||||
return
|
||||
|
||||
return self
|
||||
|
||||
def _optimize(self, X_aug, y):
|
||||
_, n_features = X_aug.shape
|
||||
|
||||
def objective(params):
|
||||
w = params[:-1].reshape(-1, 1)
|
||||
b = params[-1]
|
||||
margins = y * (X_aug[:, :-1] @ w + X_aug[:, -1] * b)
|
||||
|
||||
hinge_loss = np.sum(np.maximum(0, 1 - margins))
|
||||
|
||||
norm1_w = np.sum(np.abs(w))
|
||||
|
||||
return self.C * hinge_loss + norm1_w
|
||||
|
||||
x0 = np.zeros(n_features)
|
||||
x0[-1] = 0
|
||||
|
||||
bounds = [(None, None) if i == n_features-1 else (None, None)
|
||||
for i in range(n_features)]
|
||||
|
||||
res = minimize(objective, x0, method='L-BFGS-B', bounds=bounds)
|
||||
|
||||
self.w = res.x[:-1].reshape(-1, 1)
|
||||
self.b = res.x[-1]
|
||||
|
||||
def predict(self, X):
|
||||
if self.w is None or self.b is None:
|
||||
raise ValueError("Model not fitted yet")
|
||||
|
||||
X = np.asarray(X, dtype=np.float64)
|
||||
scores = X @ self.w + self.b
|
||||
self.y_pred = np.where(scores >= 0, 1, -1).ravel()
|
||||
return self.y_pred
|
||||
|
||||
def score(self, y_test):
|
||||
y_test = np.asarray(y_test).flatten()
|
||||
return np.mean(self.y_pred.flatten() == y_test)
|
||||
@@ -1,87 +0,0 @@
|
||||
"""
|
||||
Article : Twin Support Vector Machine
|
||||
Link : https://sci-hub.tw/https://ieeexplore.ieee.org/document/4135685
|
||||
Author : Saeed Khosravi
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from cvxopt import solvers, matrix
|
||||
|
||||
class TSVM:
|
||||
|
||||
def __init__(self, X, y, C1, C2, eps=1e-4):
|
||||
|
||||
self.A = X[y[:, 0] == 1, :]
|
||||
self.B = X[y[:, 0] == -1, :]
|
||||
self.C1 = C1
|
||||
self.C2 = C2
|
||||
self.eps = eps
|
||||
|
||||
def fit(self):
|
||||
self.w1, self.b1 = self.plane1(self.A, self.B, self.C1, self.eps)
|
||||
self.w2, self.b2 = self.plane2(self.A, self.B, self.C2, self.eps)
|
||||
|
||||
def predict(self, x_test):
|
||||
norm2_w1 = np.linalg.norm(self.w1)
|
||||
norm2_w2 = np.linalg.norm(self.w2)
|
||||
distance_1 = np.abs(np.dot(x_test, self.w1) + self.b1)/norm2_w1
|
||||
distance_2 = np.abs(np.dot(x_test, self.w2) + self.b2)/norm2_w2
|
||||
y_pred = np.zeros_like(distance_1)
|
||||
for i in range(y_pred.shape[0]):
|
||||
if (distance_1[i] < distance_2[i]):
|
||||
y_pred[i][0] = 1;
|
||||
else:
|
||||
y_pred[i][0] = -1;
|
||||
|
||||
self.preds = y_pred
|
||||
return y_pred # Return predictions
|
||||
|
||||
def plane1(self, A, B, c, eps):
|
||||
e1 = np.ones((A.shape[0],1))
|
||||
e2 = np.ones((B.shape[0],1))
|
||||
H = np.concatenate((A,e1), axis=1)
|
||||
G = np.concatenate((B,e2), axis=1)
|
||||
HTH = np.dot(H.T, H)
|
||||
if np.linalg.matrix_rank(H)<H.shape[1]:
|
||||
HTH += eps*np.eye(HTH.shape[0], HTH.shape[1])
|
||||
|
||||
_P = matrix(np.dot(np.dot(G, np.linalg.inv(HTH)),G.T), tc = 'd')
|
||||
_q = matrix(-1 * e2, tc = 'd')
|
||||
_G = matrix(np.concatenate((np.identity(B.shape[0]),-np.identity(B.shape[0])), axis=0), tc = 'd')
|
||||
_h = matrix(np.concatenate((c*e2,np.zeros_like(e2)), axis=0), tc = 'd')
|
||||
qp_sol = solvers.qp(_P, _q, _G, _h, kktsolver='ldl', options={'kktreg':1e-9, 'show_progress':False})
|
||||
qp_sol = np.array(qp_sol['x'])
|
||||
z = -np.dot(np.dot(np.linalg.inv(HTH), G.T), qp_sol)
|
||||
w = z[:z.shape[0]-1]
|
||||
b = z[z.shape[0]-1]
|
||||
return w, b[0]
|
||||
|
||||
def plane2(self, A, B, c, eps):
|
||||
e1 = np.ones((A.shape[0],1))
|
||||
e2 = np.ones((B.shape[0],1))
|
||||
H = np.concatenate((A,e1), axis=1)
|
||||
G = np.concatenate((B,e2), axis=1)
|
||||
GTG = np.dot(G.T, G)
|
||||
if np.linalg.matrix_rank(G)<G.shape[1]:
|
||||
GTG += eps*np.eye(GTG.shape[0], GTG.shape[1])
|
||||
#solving the qp by cvxopt
|
||||
_P = matrix(np.dot(np.dot(H, np.linalg.inv(GTG)), H.T), tc = 'd')
|
||||
_q = matrix(-1 * e1, tc = 'd')
|
||||
_G = matrix(np.concatenate((np.identity(A.shape[0]),-np.identity(A.shape[0])), axis=0), tc = 'd')
|
||||
_h = matrix(np.concatenate((c*e1,np.zeros_like(e1)), axis=0), tc = 'd')
|
||||
qp_sol = solvers.qp(_P, _q, _G, _h, kktsolver='ldl', options={'kktreg':1e-9, 'show_progress':False})
|
||||
qp_sol = np.array(qp_sol['x'])
|
||||
z = -np.dot(np.dot(np.linalg.inv(GTG), H.T), qp_sol)
|
||||
w = z[:z.shape[0]-1]
|
||||
b = z[z.shape[0]-1]
|
||||
return w, b[0]
|
||||
|
||||
def get_params(self):
|
||||
return self.w1, self.b1, self.w2, self.b2
|
||||
|
||||
def get_preds(self):
|
||||
return self.preds
|
||||
|
||||
def score(self, y_test):
|
||||
accuracy = np.sum(self.preds == y_test)/y_test.shape[0]
|
||||
return accuracy
|
||||
@@ -1,93 +0,0 @@
|
||||
from models.S3VM_constrained import S3VM_Constrained
|
||||
from models.S3VM_unconstrained import S3VM_Unconstrained
|
||||
from models.NewtonUTSVM import NewtonUTSVM
|
||||
from models.TSVM import TSVM
|
||||
from models.LSTSVM import LSTSVM
|
||||
from models.utils import load_dataset
|
||||
|
||||
|
||||
MODELS = [
|
||||
"Semi-Supervised_SVM",
|
||||
"Semi-Supervised_SVM_Unconstrained",
|
||||
"Newton_Universum_Twin_SVM",
|
||||
"Least-Square_Twin_SVM",
|
||||
"Twin_SVM"
|
||||
]
|
||||
|
||||
|
||||
def runner(model, dataset, params):
|
||||
|
||||
csv_file = f"datasets/{dataset}.csv"
|
||||
x_train, y_train, x_test, y_test, U = load_dataset(csv_file)
|
||||
accuracy = 0
|
||||
|
||||
print('model: ', model)
|
||||
|
||||
match model:
|
||||
|
||||
case "Semi-Supervised_SVM":
|
||||
|
||||
C = params['C'] or 1.0
|
||||
max_iter = params.get('max_iter') or 100
|
||||
|
||||
modelObj = S3VM_Constrained(C, max_iter)
|
||||
modelObj.fit(x_train, y_train, U)
|
||||
modelObj.predict(x_test)
|
||||
accuracy = modelObj.score(y_test)
|
||||
params = {"C": C, "max_iter": max_iter}
|
||||
|
||||
case "Semi-Supervised_SVM_Unconstrained":
|
||||
|
||||
C = params['C'] or 1.0
|
||||
|
||||
modelObj = S3VM_Unconstrained(C)
|
||||
modelObj.fit(x_train, y_train, U)
|
||||
modelObj.predict(x_test)
|
||||
accuracy = modelObj.score(y_test)
|
||||
params = {"C": C}
|
||||
|
||||
case "Newton_Universum_Twin_SVM":
|
||||
|
||||
C = params['C'] or [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
|
||||
|
||||
modelObj = NewtonUTSVM(x_train, y_train, U, C)
|
||||
modelObj.fit()
|
||||
modelObj.predict(x_test)
|
||||
accuracy = modelObj.score(y_test)
|
||||
params = {"C": C}
|
||||
|
||||
case "Least-Square_Twin_SVM":
|
||||
|
||||
C = params['C'] or [1.0, 1.0]
|
||||
|
||||
modelObj = LSTSVM(x_train, y_train, C[0], C[1])
|
||||
modelObj.fit()
|
||||
modelObj.predict(x_test, y_test)
|
||||
accuracy = modelObj.score(y_test)
|
||||
params = {"C": C}
|
||||
|
||||
case "Twin_SVM":
|
||||
|
||||
C = params['C'] or [1.0, 1.0]
|
||||
|
||||
modelObj = TSVM(x_train, y_train, C[0], C[1])
|
||||
modelObj.fit()
|
||||
modelObj.predict(x_test)
|
||||
accuracy = modelObj.score(y_test)
|
||||
params = {"C": C}
|
||||
|
||||
|
||||
accuracy = round(accuracy, 4)
|
||||
|
||||
return {
|
||||
"model":model,
|
||||
"dataset":dataset,
|
||||
"params": params,
|
||||
"accuracy": accuracy
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
106
models/utils.py
106
models/utils.py
@@ -1,106 +0,0 @@
|
||||
from datetime import datetime
|
||||
from ultralytics import YOLO
|
||||
import numpy as np
|
||||
import cv2
|
||||
import csv
|
||||
import os
|
||||
|
||||
def min_max_normalize(matrix):
|
||||
min_vals = np.min(matrix, axis=0)
|
||||
max_vals = np.max(matrix, axis=0)
|
||||
range_vals = max_vals - min_vals
|
||||
range_vals[range_vals == 0] = 1
|
||||
normalized_matrix = (matrix - min_vals) / range_vals
|
||||
return normalized_matrix
|
||||
|
||||
|
||||
|
||||
def load_dataset(csv_file,unlabeled_ratio=0.15, test_ratio=0.4):
|
||||
|
||||
data = np.genfromtxt(csv_file, delimiter=",", dtype=str, skip_header=1)
|
||||
class_names = np.unique(data[:, -1])
|
||||
print(f"classes: {class_names[0]} / {class_names[1]}")
|
||||
print(f"dataset samples: {data.shape[0]} / features: {data.shape[1] - 1}")
|
||||
if class_names[0] in np.unique(data[:, -1]) or class_names[1] in np.unique(data[:, -1]):
|
||||
data[:, -1] = np.where(data[:, -1] == class_names[0], 1, -1)
|
||||
|
||||
data = data.astype(np.float32)
|
||||
|
||||
features = min_max_normalize(data[:, :-1])
|
||||
|
||||
|
||||
np.random.seed(10000)
|
||||
indices = np.random.permutation(len(features))
|
||||
|
||||
split_idx = int(len(features) * (1 - unlabeled_ratio))
|
||||
labeled_test_features = features[indices[:split_idx]]
|
||||
labeled_test_labels = data[indices[:split_idx]][:, -1]
|
||||
U = features[indices[split_idx:]]
|
||||
|
||||
test_split_idx = int(len(labeled_test_features) * (1 - test_ratio))
|
||||
X = labeled_test_features[:test_split_idx]
|
||||
y = labeled_test_labels[:test_split_idx]
|
||||
X_test = labeled_test_features[test_split_idx:]
|
||||
y_test = labeled_test_labels[test_split_idx:]
|
||||
y = y.reshape(y.shape[0], 1)
|
||||
y_test = y_test.reshape(y_test.shape[0], 1)
|
||||
|
||||
|
||||
return X, y, X_test, y_test, U
|
||||
|
||||
|
||||
def save_result(model, dataset, accuracy, params, results_file):
|
||||
file_exists = os.path.isfile(results_file)
|
||||
with open(results_file, mode="a", newline="") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=["timestamp", "model", "dataset", "parameters", "accuracy"])
|
||||
if not file_exists:
|
||||
writer.writeheader()
|
||||
writer.writerow({
|
||||
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"model": model,
|
||||
"dataset": dataset,
|
||||
"parameters": params,
|
||||
"accuracy": accuracy
|
||||
})
|
||||
|
||||
def load_results(limit, results_file):
|
||||
if not os.path.isfile(results_file):
|
||||
return []
|
||||
with open(results_file, mode="r") as f:
|
||||
reader = list(csv.DictReader(f))
|
||||
return reader[::-1][:limit]
|
||||
|
||||
|
||||
|
||||
|
||||
def predict_yolo(image_path, confidence):
|
||||
print("Predicting:", image_path)
|
||||
|
||||
# Load YOLO model
|
||||
model = YOLO("templates/static/public/files/repair/weights/14_class_best.pt")
|
||||
|
||||
# Run prediction on the input image
|
||||
results = model.predict(
|
||||
source=f"templates/static/public/files/repair/images/{image_path}",
|
||||
save=False,
|
||||
conf=confidence,
|
||||
device = "cpu",
|
||||
batch=4,
|
||||
imgsz=320
|
||||
)
|
||||
|
||||
|
||||
predicted_img = results[0].plot() # OpenCV image with boxes drawn
|
||||
|
||||
# Save location (always overwrite the same file)
|
||||
output_dir = "templates/static/public/files/repair/predicted"
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
output_filename = "predicted_image.jpg" # fixed name
|
||||
output_path = os.path.join(output_dir, output_filename)
|
||||
print(f"image {predicted_img} written in : ", output_path)
|
||||
|
||||
# Write image
|
||||
cv2.imwrite(output_path, predicted_img)
|
||||
|
||||
# Return only the filename so template can use it
|
||||
return output_filename
|
||||
Reference in New Issue
Block a user