diff --git a/datasets/MC_NDCC.py b/datasets/MC_NDCC.py deleted file mode 100755 index 7f627b1..0000000 --- a/datasets/MC_NDCC.py +++ /dev/null @@ -1,101 +0,0 @@ -# TITLE: Multi-Class Normal Distribution Cubic Clusters Dataset Generator -# AUTHOR: Dr. Hossein Moosaei, Saeed Khosravi -# Date: 10/09/2020 - -# NORMALLY DISTRIBUTED CLUSTERS is a data generator. -# It generates a series of random centers for multivariate -#normal distributions. NDC randomly generates a fraction -# of data for each center, i.e. what fraction of data points -# will come from this center. NDC randomly generates a -# separating plane. Based on this plane, classes for are -# chosen for each center. NDC then randomly generates the -# points from the distributions. NDC can increase -# inseparability by increasng variances of distributions. -# A measure of "true" separability is obtained by looking -# at how many points end up on the wrong side of the -# separating plane. All values are taken as integers -# for simplicity. - -import numpy as np -import pandas as pd -class MC_NDCC: - - def __init__(self,n_centers, n_samples, n_features, n_classes): - # self.n_samples = int(input("Enter number of samples: \n")) - # self.n_features = int(input("Enter number of features: \n")) - # self.n_classes = int(input("Enter number of classes: \n")) - centers = [100, 300, 500, 700] - self.centers_list = centers[0:n_centers] - self.n_samples = n_samples - self.n_features = n_features - self.n_classes = n_classes - self.center_points = self.centers_matrix(self.centers_list, self.n_features) - self.n_centers = 2*len(self.centers_list)*self.n_features - self.class_locations = self.class_center_locations(self.n_classes, self.n_centers) - self.ss = self.sample_spliter(self.n_samples, self.n_classes, self.n_centers) - r, c = self.class_locations.shape - self.M = np.zeros((0, self.n_features)) - self.l = np.zeros((0, 1)) - for i in range(r): - for j in range(c): - self.temp = np.random.normal(loc = self.center_points[int(self.class_locations[i, j])],size = (int(self.ss[i,j]), self.n_features),scale = 5) - self.label_temp = np.ones((int(self.ss[i,j]), 1))*(i+1) - self.l = np.concatenate((self.l, self.label_temp), axis = 0) - self.M = np.concatenate((self.M, self.temp) , axis = 0) - self.M = np.concatenate((self.M, self.l), axis = 1).astype('int32') - np.random.shuffle(self.M) - def sample_spliter(self, n_samples, n_classes, n_centers): - # This function generates the number of samples belongs to each class - # Centers approximately have n_centers/n_classes samples with a small variance - count = 0 - n_cen_fe_cls = int(np.floor(n_centers/n_classes)) - n_each_c = np.zeros((n_classes, n_cen_fe_cls)) - while(n_samples > count): - r = np.random.randint(n_classes) - r2 = np.random.randint(n_cen_fe_cls) - n_each_c[r, r2] += 1 - count += 1 - return n_each_c - - def class_center_locations(self, n_classes, n_centers): - - # This function specifies which center - # points belong to which classes - - # It returns a matrix in size of n_classess by - # n_centers_for_each_class that means a row for each class - - rng = np.random.default_rng() - # Generate list of random non-repeatative numbers from 1 to n_center - locs = rng.choice(n_centers, n_centers, replace=False) - # number of centers for each class - n_cen_fe_cls = int(np.floor(n_centers/n_classes)) - cls_locs = np.zeros((n_classes,n_cen_fe_cls)) - k = 0 - for i in range(n_classes): - for j in range(n_cen_fe_cls): - cls_locs[i,j] = locs[k] - k += 1 - return cls_locs - - def centers_matrix(self, centers_list, n_features): - # This function returns the matrix of center locations - # based on centers_list in n_features space - n_centers = 2*len(centers_list)*n_features - centers_matrix = np.zeros((n_centers, n_features)) - for i in range(len(centers_list)): - for j in range(n_features): - centers_matrix[i*2*n_features + 2*j , j] = centers_list[i] - centers_matrix[i*2*n_features + 2*j+1, j] = -centers_list[i] - return centers_matrix - - - def get_matrix(self): - # Get the dataset as a numpy matrix - return self.M - - def get_csv(self, filename): - # Save the dataset as csv file - df = pd.DataFrame(self.M) - df.to_csv(filename, header = False, index = False) - print(f'Dataset saved as {filename} in current directory. ') diff --git a/datasets/__init__.py b/datasets/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/datasets/datasets.py b/datasets/datasets.py deleted file mode 100644 index 737d354..0000000 --- a/datasets/datasets.py +++ /dev/null @@ -1,17 +0,0 @@ - -DATASETS = [ - "bill", - "brain", - "glass", - "hcv", - "heart", - "ionosphere", - "iris", - "raisin", - "sonar", - "wholesale", - "wine", - "yeast" -] - - diff --git a/models/LSTSVM.py b/models/LSTSVM.py deleted file mode 100644 index 3816cfb..0000000 --- a/models/LSTSVM.py +++ /dev/null @@ -1,104 +0,0 @@ -""" -Article : Least squares twin support vector machines for pattern classification -Link : https://sci-hub.tw/https://www.sciencedirect.com/science/article/abs/pii/S0957417408006854 -Author : Saeed Khosravi -""" -import numpy as np -class LSTSVM: - """ - Least Squares Support Vector Machines - A = Instances with label +1 - B = Instances with label -1 - C1 = hyperparameter for hyperplane 1 - C2 = hyperparameter for hyperplane 2 - - """ - def __init__(self, X, y, C1, C2, eps = 1e-4): - self.A = X[np.ix_(y[:,0] == 1),:][0,:,:] - self.B = X[np.ix_(y[:,0] == -1),:][0,:,:] - self.C1 = C1 - self.C2 = C2 - self.eps = eps - - def fit(self): - A = self.A - B = self.B - C1 = self.C1 - C2 = self.C2 - eps = self.eps - m1, n = A.shape - m2, n = B.shape - e1 = np.ones((m1, 1)) - e2 = np.ones((m2, 1)) - X = np.concatenate((A, B), axis=0) - G = np.concatenate((A, e1), axis=1) - H = np.concatenate((B, e2), axis=1) - - - if(m1 < m2): - Y = self.calc_Y_or_Z(H) - - #w1, b1 - GYGT = np.dot(np.dot(G, Y), G.T) - I = np.eye(GYGT.shape[0], GYGT.shape[1]) - w1_b1 = - np.dot(Y - np.dot(np.dot(np.dot(Y, G.T), np.linalg.inv(C1*I + GYGT)), np.dot(G, Y)), - np.dot(H.T, np.ones((H.T.shape[1], 1)))) - w1 = w1_b1[:-1, :] - b1 = w1_b1[ -1, :] - - #w2, b2 - w2_b2 = C2 * np.dot(Y - np.dot(np.dot(np.dot(Y, G.T), np.linalg.inv((I/C2)+GYGT)), np.dot(G, Y)), - np.dot(G.T, np.ones((G.T.shape[1], 1)))) - w2 = w2_b2[:-1, :] - b2 = w2_b2[ -1, :] - - else: - Z = self.calc_Y_or_Z(G) - - #w1, b1 - HZHT = np.dot(np.dot(H, Z), H.T) - I = np.eye(HZHT.shape[0], HZHT.shape[1]) - w1_b1 = -C1*np.dot(Z - np.dot(np.dot(np.dot(Z, H.T), np.linalg.inv((I/C1) + HZHT)), np.dot(H, Z)), - np.dot(H.T, np.ones((H.T.shape[1], 1)))) - w1 = w1_b1[:-1, :] - b1 = w1_b1[ -1, :] - - #w2, b2 - w2_b2 = np.dot(Z - np.dot(np.dot(np.dot(Z, H.T), np.linalg.inv(C2*I + HZHT)), np.dot(H, Z)), - np.dot(G.T, np.ones((G.T.shape[1], 1)))) - w2 = w2_b2[:-1, :] - b2 = w2_b2[ -1, :] - - self.w1 = w1 - self.w2 = w2 - self.b1 = b1 - self.b2 = b2 - - def predict(self, x_test, y_test): - distance1 = np.abs(np.dot(x_test, self.w1) + self.b1) - distance2 = np.abs(np.dot(x_test, self.w2) + self.b2) - y_pred = np.zeros_like(y_test) - for d in range(y_pred.shape[0]): - if (distance1[d] < distance2[d]): - y_pred[d][0] = 1; - else: - y_pred[d][0] = -1; - self.preds = y_pred - - def calc_Y_or_Z(self, M): - MMT = np.dot(M, M.T) - I = np.eye(MMT.shape[0], MMT.shape[1]) - tmp = np.dot(np.dot(M.T, np.linalg.inv(self.eps*I + MMT)), M) - I = np.eye(tmp.shape[0], tmp.shape[1]) - return (1/self.eps)*(I-tmp) - - def get_params(self): - return self.w1, self.b1, self.w2, self.b2 - - - def get_preds(self): - return self.preds - - def score(self, y_test): - accuracy = np.sum(self.preds == y_test)/y_test.shape[0] - return accuracy \ No newline at end of file diff --git a/models/NewtonUTSVM.py b/models/NewtonUTSVM.py deleted file mode 100755 index 1211ffd..0000000 --- a/models/NewtonUTSVM.py +++ /dev/null @@ -1,160 +0,0 @@ -import numpy as np - -class NewtonUTSVM: - def __init__(self, X, y, U, C, eps=1e-4): - self.X = np.asarray(X, dtype=np.float64) - self.y = np.asarray(y, dtype=np.float64).reshape(-1, 1) - self.U = np.asarray(U, dtype=np.float64) - self.C = np.asarray(C, dtype=np.float64) - self.eps = eps - - def fit(self): - np.random.seed(42) - self.w1 = np.random.normal(0, 0.01, (self.X.shape[1], 1)) - self.b1 = 0.0 - self.w2 = np.random.normal(0, 0.01, (self.X.shape[1], 1)) - self.b2 = 0.0 - - for _ in range(5): - self.w1, self.b1 = self.plane1(self.X, self.y, self.U, - self.C[0], self.C[1], self.C[2], self.eps) - self.w2, self.b2 = self.plane2(self.X, self.y, self.U, - self.C[3], self.C[4], self.C[5], self.eps) - - def predict(self, x_test): - x_test = np.asarray(x_test, dtype=np.float64) - - dist1 = self._safe_distance(x_test, self.w1, self.b1) - dist2 = self._safe_distance(x_test, self.w2, self.b2) - - y_pred = np.where(dist1 < dist2, 1, -1).reshape(-1, 1) - self.preds = y_pred - return y_pred - - def _safe_distance(self, X, w, b): - norm = np.linalg.norm(w) - if norm < 1e-10: - return np.full((X.shape[0],), np.inf) - return np.abs(X @ w + b) / norm - - def plane1(self, X, y, U, C1, C2, C3, eps): - A = X[y[:,0] == 1] - B = X[y[:,0] == -1] - - T1 = np.hstack([A, np.ones((A.shape[0], 1))]) - T2 = np.hstack([B, np.ones((B.shape[0], 1))]) - T3 = np.hstack([U, np.ones((U.shape[0], 1))]) - - Z = np.random.normal(0, 0.01, (X.shape[1]+1, 1)) - prev_Z = np.zeros_like(Z) - - learning_rate = 0.1 - best_loss = float('inf') - - for count in range(100): - e2 = np.ones((B.shape[0], 1)) - eu = np.ones((U.shape[0], 1)) - - margin_B = e2 + T2 @ Z - margin_U = (-1 + eps)*eu - T3 @ Z - - grad = (T1.T @ (T1 @ Z) + - C1 * T2.T @ self.func(margin_B, 'pf') + - C2 * Z - - C3 * T3.T @ self.func(margin_U, 'pf')) - - D1 = self.mat_diag(self.func(margin_B, 'pf') > 0) - D2 = self.func(margin_U, 'pf') > 0 - hessian = (T1.T @ T1 + - C1 * T2.T @ D1 @ T2 + - C2 * np.eye(Z.shape[0]) + - C3 * T3.T @ np.diag(D2.flatten()) @ T3) - - hessian += 1e-4 * np.eye(hessian.shape[0]) - - delta = np.linalg.solve(hessian, grad) - Z -= learning_rate * delta - - current_loss = np.linalg.norm(grad) - if current_loss < best_loss: - best_loss = current_loss - learning_rate = min(learning_rate * 1.1, 1.0) - else: - learning_rate = max(learning_rate * 0.5, 1e-4) - - if np.linalg.norm(Z - prev_Z) < self.eps: - break - prev_Z = Z.copy() - - return Z[:-1], Z[-1][0] - - def plane2(self, X, y, U, C4, C5, C6, eps): - A = X[y[:,0] == 1] - B = X[y[:,0] == -1] - - # Add bias terms - G1 = np.hstack([B, np.ones((B.shape[0], 1))]) - G2 = np.hstack([A, np.ones((A.shape[0], 1))]) - G3 = np.hstack([U, np.ones((U.shape[0], 1))]) - - Y = np.random.normal(0, 0.01, (X.shape[1]+1, 1)) - prev_Y = np.zeros_like(Y) - - learning_rate = 0.1 - best_loss = float('inf') - - for count in range(100): - e1 = np.ones((A.shape[0], 1)) - eu = np.ones((U.shape[0], 1)) - - margin_A = e1 - G2 @ Y - margin_U = (-1 + eps)*eu + G3 @ Y - - grad = (G1.T @ (G1 @ Y) - - C4 * G2.T @ self.func(margin_A, 'pf') + - C5 * Y + - C6 * G3.T @ self.func(margin_U, 'pf')) - - D3 = self.func(margin_A, 'pf') > 0 - D4 = self.func(margin_U, 'pf') > 0 - hessian = (G1.T @ G1 + - C4 * G2.T @ np.diag(D3.flatten()) @ G2 + - C5 * np.eye(Y.shape[0]) + - C6 * G3.T @ np.diag(D4.flatten()) @ G3) - - hessian += 1e-4 * np.eye(hessian.shape[0]) - - delta = np.linalg.solve(hessian, grad) - Y -= learning_rate * delta - - current_loss = np.linalg.norm(grad) - if current_loss < best_loss: - best_loss = current_loss - learning_rate = min(learning_rate * 1.1, 1.0) - else: - learning_rate = max(learning_rate * 0.5, 1e-4) - - if np.linalg.norm(Y - prev_Y) < self.eps: - break - prev_Y = Y.copy() - - return Y[:-1], Y[-1][0] - - def func(self, x, type='pf', ro=1e20): - if type == 'pf': - return np.maximum(0, x) - elif type == 'sm': - return x + (1/ro)*np.log(1+np.exp(-ro*x)) - - def mat_diag(self, m): - return np.diag(m.flatten()) - - def get_params(self): - return self.w1, self.b1, self.w2, self.b2 - - def get_preds(self): - return self.preds - - def score(self, y_test): - y = np.asarray(y_test).flatten() - return np.mean(self.preds.flatten() == y) \ No newline at end of file diff --git a/models/S3VM_constrained.py b/models/S3VM_constrained.py deleted file mode 100644 index 14c7975..0000000 --- a/models/S3VM_constrained.py +++ /dev/null @@ -1,144 +0,0 @@ -import numpy as np -from scipy.optimize import minimize -from sklearn.base import BaseEstimator, ClassifierMixin - -class S3VM_Constrained(BaseEstimator, ClassifierMixin): - - - def __init__(self, C = 1.0, M=1e5, eps=1e-4, max_iter=100): - - self.C = C - self.M = M - self.eps = eps - self.max_iter = max_iter - self.w = None - self.b = None - self.y_pred = 0 - self.y = 0 - - def fit(self, X_labeled, y_labeled, X_unlabeled): - - X_labeled = np.asarray(X_labeled, dtype=np.float64) - y_labeled = np.asarray(y_labeled, dtype=np.float64).reshape(-1, 1) - X_unlabeled = np.asarray(X_unlabeled, dtype=np.float64) - - unique_labels = np.unique(y_labeled) - if not (set(unique_labels) <= {1.0, -1.0}): - raise ValueError("Labels must be +1 or -1") - - n_labeled, n_features = X_labeled.shape - n_unlabeled = X_unlabeled.shape[0] - - self._initialize_parameters(n_features, n_labeled, n_unlabeled) - - X = np.vstack([X_labeled, X_unlabeled]) - - for iteration in range(self.max_iter): - y_unlabeled = self._predict_unlabeled(X_unlabeled) - - self._optimize_mip(X_labeled, y_labeled, X_unlabeled, y_unlabeled) - - new_labels = self._predict_unlabeled(X_unlabeled) - if np.mean(new_labels != y_unlabeled) < self.eps: - break - - return self - - def _initialize_parameters(self, n_features, n_labeled, n_unlabeled): - - self.w = np.random.normal(0, 0.01, (n_features, 1)) - self.b = 0.0 - self.eta = np.zeros(n_labeled) - self.xi = np.zeros(n_unlabeled) - self.z = np.zeros(n_unlabeled) - self.d = np.random.rand(n_unlabeled) - - def _predict_unlabeled(self, X_unlabeled): - - scores = X_unlabeled @ self.w + self.b - return np.where(scores >= 0, 1, -1) - - def _optimize_mip(self, X_labeled, y_labeled, X_unlabeled, y_unlabeled): - - n_labeled, n_features = X_labeled.shape - n_unlabeled = X_unlabeled.shape[0] - - x0 = np.concatenate([ - self.w.flatten(), - [self.b], - self.eta, - self.xi, - self.z, - self.d - ]) - - bounds = ( - [(None, None)] * n_features + - [(None, None)] + - [(0, None)] * n_labeled + - [(0, None)] * n_unlabeled + - [(0, None)] * n_unlabeled + - [(0, 1)] * n_unlabeled - ) - - constraints = [ - { - 'type': 'ineq', - 'fun': lambda x: y_labeled.flatten() * - (X_labeled @ x[:n_features] + x[n_features]) + - x[n_features+1:n_features+1+n_labeled] - 1 - - }, - - { - 'type': 'ineq', - 'fun': lambda x: (X_unlabeled @ x[:n_features] - x[n_features] + - x[n_features+1+n_labeled:n_features+1+n_labeled+n_unlabeled] + - self.M*(1 - x[-n_unlabeled:])) - 1 - }, - - { - 'type': 'ineq', - 'fun': lambda x: (-(X_unlabeled @ x[:n_features] - x[n_features]) + - x[n_features+1+n_labeled+n_unlabeled:n_features+1+n_labeled+2*n_unlabeled] + - self.M*x[-n_unlabeled:]) - 1 - } - ] - - def objective(x): - w = x[:n_features] - eta = x[n_features+1:n_features+1+n_labeled] - xi = x[n_features+1+n_labeled:n_features+1+n_labeled+n_unlabeled] - z = x[n_features+1+n_labeled+n_unlabeled:n_features+1+n_labeled+2*n_unlabeled] - - return (self.C * (np.sum(eta) + np.sum(xi + z)) + np.sum(np.abs(w))) - - res = minimize( - objective, - x0, - method='SLSQP', - bounds=bounds, - constraints=constraints, - options={'maxiter': 1000} - ) - - self.w = res.x[:n_features].reshape(-1, 1) - self.b = res.x[n_features] - self.eta = res.x[n_features+1:n_features+1+n_labeled] - self.xi = res.x[n_features+1+n_labeled:n_features+1+n_labeled+n_unlabeled] - self.z = res.x[n_features+1+n_labeled+n_unlabeled:n_features+1+n_labeled+2*n_unlabeled] - self.d = res.x[-n_unlabeled:] - - def predict(self, X): - if self.w is None or self.b is None: - raise ValueError("Model not fitted yet") - - X = np.asarray(X, dtype=np.float64) - scores = X @ self.w + self.b - self.y_pred = np.where(scores >= 0, 1, -1) - return self.y_pred - - - def score(self, y_test): - y = np.asarray(y_test).flatten() - return np.mean(self.y_pred.flatten() == y) \ No newline at end of file diff --git a/models/S3VM_unconstrained.py b/models/S3VM_unconstrained.py deleted file mode 100644 index 7111566..0000000 --- a/models/S3VM_unconstrained.py +++ /dev/null @@ -1,80 +0,0 @@ -import numpy as np -from scipy.optimize import minimize - -class S3VM_Unconstrained: - - def __init__(self, C=1.0, eps=1e-4): - self.C = C - self.eps = eps - self.w = None - self.b = None - - def fit(self, X_labeled, y_labeled, X_unlabeled): - X_labeled = np.asarray(X_labeled, dtype=np.float64) - y_labeled = np.asarray(y_labeled, dtype=np.float64).reshape(-1, 1) - X_unlabeled = np.asarray(X_unlabeled, dtype=np.float64) - - unique_labels = np.unique(y_labeled) - if not (set(unique_labels) <= {1.0, -1.0}): - raise ValueError("Labels must be +1 or -1") - - n_features = X_labeled.shape[1] - self.w = np.zeros((n_features, 1)) - self.b = 0.0 - - X_labeled_aug = np.hstack([X_labeled, np.ones((X_labeled.shape[0], 1))]) - X_unlabeled_aug = np.hstack([X_unlabeled, np.ones((X_unlabeled.shape[0], 1))]) - - - unlabeled_scores = X_unlabeled_aug @ np.vstack([self.w, self.b]) - y_unlabeled = np.sign(unlabeled_scores) - y_unlabeled[y_unlabeled == 0] = 1 - - X_aug = np.vstack([X_labeled_aug, X_unlabeled_aug]) - y = np.vstack([y_labeled, y_unlabeled]) - - self._optimize(X_aug, y) - - new_scores = X_unlabeled_aug @ np.vstack([self.w, self.b]) - if np.all(np.sign(new_scores) == y_unlabeled): - return - - return self - - def _optimize(self, X_aug, y): - _, n_features = X_aug.shape - - def objective(params): - w = params[:-1].reshape(-1, 1) - b = params[-1] - margins = y * (X_aug[:, :-1] @ w + X_aug[:, -1] * b) - - hinge_loss = np.sum(np.maximum(0, 1 - margins)) - - norm1_w = np.sum(np.abs(w)) - - return self.C * hinge_loss + norm1_w - - x0 = np.zeros(n_features) - x0[-1] = 0 - - bounds = [(None, None) if i == n_features-1 else (None, None) - for i in range(n_features)] - - res = minimize(objective, x0, method='L-BFGS-B', bounds=bounds) - - self.w = res.x[:-1].reshape(-1, 1) - self.b = res.x[-1] - - def predict(self, X): - if self.w is None or self.b is None: - raise ValueError("Model not fitted yet") - - X = np.asarray(X, dtype=np.float64) - scores = X @ self.w + self.b - self.y_pred = np.where(scores >= 0, 1, -1).ravel() - return self.y_pred - - def score(self, y_test): - y_test = np.asarray(y_test).flatten() - return np.mean(self.y_pred.flatten() == y_test) \ No newline at end of file diff --git a/models/TSVM.py b/models/TSVM.py deleted file mode 100644 index 18828ae..0000000 --- a/models/TSVM.py +++ /dev/null @@ -1,87 +0,0 @@ -""" -Article : Twin Support Vector Machine -Link : https://sci-hub.tw/https://ieeexplore.ieee.org/document/4135685 -Author : Saeed Khosravi -""" - -import numpy as np -from cvxopt import solvers, matrix - -class TSVM: - - def __init__(self, X, y, C1, C2, eps=1e-4): - - self.A = X[y[:, 0] == 1, :] - self.B = X[y[:, 0] == -1, :] - self.C1 = C1 - self.C2 = C2 - self.eps = eps - - def fit(self): - self.w1, self.b1 = self.plane1(self.A, self.B, self.C1, self.eps) - self.w2, self.b2 = self.plane2(self.A, self.B, self.C2, self.eps) - - def predict(self, x_test): - norm2_w1 = np.linalg.norm(self.w1) - norm2_w2 = np.linalg.norm(self.w2) - distance_1 = np.abs(np.dot(x_test, self.w1) + self.b1)/norm2_w1 - distance_2 = np.abs(np.dot(x_test, self.w2) + self.b2)/norm2_w2 - y_pred = np.zeros_like(distance_1) - for i in range(y_pred.shape[0]): - if (distance_1[i] < distance_2[i]): - y_pred[i][0] = 1; - else: - y_pred[i][0] = -1; - - self.preds = y_pred - return y_pred # Return predictions - - def plane1(self, A, B, c, eps): - e1 = np.ones((A.shape[0],1)) - e2 = np.ones((B.shape[0],1)) - H = np.concatenate((A,e1), axis=1) - G = np.concatenate((B,e2), axis=1) - HTH = np.dot(H.T, H) - if np.linalg.matrix_rank(H)