init commit

This commit is contained in:
2025-11-08 18:45:48 +01:00
parent 6d1d845c4e
commit 2f82041a21
9 changed files with 1393 additions and 0 deletions

39
MC_NDCC_matlab/NDCC.m Executable file
View File

@@ -0,0 +1,39 @@
%{
NORMALLY DISTRIBUTED CLUSTERS is a data generator.
It generates a series of random centers for multivariate
normal distributions. NDC randomly generates a fraction
of data for each center, i.e. what fraction of data points
will come from this center. NDC randomly generates a
separating plane. Based on this plane, classes for are
chosen for each center. NDC then randomly generates the
points from the distributions. NDC can increase
inseparability by increasng variances of distributions.
A measure of "true" separability is obtained by looking
at how many points end up on the wrong side of the
separating plane. All values are taken as integers
for simplicity.
%}
centers_list = [100, 300, 500];
n_samples = input('Enter the number of samples:\n');
n_features = input('Enter the number of features:\n');
n_classes = input('Enter the number of classes:\n');
% Generating center matrix based on centers_list and number of features
centers_matrix = get_centers_mat(centers_list, n_features);
n_centers = 2*length(centers_list)*n_features;
% The same number of randomly chosen centers will dedicate to each class
class_locations = class_center_locations(n_classes, n_centers);
% Deciding randomly that how many samples should be in each class_locations
ss = sample_spliter(n_samples, n_classes, n_centers);
%Generating dataset
ds = generate_dataset(centers_matrix, ss,class_locations, n_features);
%Saving the dataset as a csv file in current directory
writematrix(ds, 'dataset.csv');

View File

@@ -0,0 +1,27 @@
function cls_locs = class_center_locations(n_classes, n_centers);
%{
*** This function specifies which center points belong
to which classes
*** It returns a matrix in size of n_classess by
n_centers_for_each_class that means a row for each class
%}
% Generate list of random non-repeatative numbers from 1 to n_center
locs = datasample(1:n_centers,n_centers,'Replace',false);
% number of centers for each class
n_cen_fe_cls = int32(floor(n_centers/n_classes));
cls_locs = zeros(n_classes,n_cen_fe_cls);
k = 1;
for i = 1:n_classes
for j = 1:n_cen_fe_cls
cls_locs(i,j) = locs(k);
k = k+1;
end
end
end

1000
MC_NDCC_matlab/dataset.csv Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,30 @@
function M = generate_dataset(centers_matrix, ss,class_locations, n_features)
%{
*** This function returns the generated dataset matrix with
coresponding labels
*** Samples of each center generated using normal distribution
function with mu as the center location and sigma as 5
*** Size of the samples are given from sample spliter funtion
%}
[r, c] = size(class_locations);
%Intialize the matrix as an empty matrix
M = zeros(0, n_features);
l = zeros(0, 1);
for i = 1:r
for j = 1:c
%Generate samples in specific center point(mu) and (sigma = 5,
%and) in size of ss by n_features
tmp = normrnd(centers_matrix(int32(class_locations(i,j))), 5,[int32(ss(i,j)), n_features]);
label_tmp = ones(int32(ss(i,j)), 1)*(i);
l = [l; label_tmp];
M = [M; tmp];
end
end
M = [M, l];
end

View File

@@ -0,0 +1,17 @@
function [centers_matrix] = get_centers_mat(centers_list, n_features)
%{
*** This function returns the matrix of center locations
based on centers_list in n_features space
%}
n_centers = 2*length(centers_list) * n_features;
centers_matrix = zeros(n_centers, n_features);
for i = 1:length(centers_list)
for j = 1:n_features
centers_matrix((i-1)*2*n_features + (2*j)-1, j) = centers_list(i);
centers_matrix((i-1)*2*n_features + (2*j), j) = -centers_list(i);
end
end
end

19
MC_NDCC_matlab/sample_spliter.m Executable file
View File

@@ -0,0 +1,19 @@
function n_each_c = sample_spliter(n_samples, n_classes, n_centers)
%{
*** This function generates the number of samples belongs
to each class
*** Centers approximately have n_centers/n_classes samples with a small variance
%}
count = 0;
n_cen_fe_cls = int32(floor(n_centers/n_classes));
n_each_c = zeros(n_classes, n_cen_fe_cls);
while n_samples > count
r = randi(n_classes);
r2 = randi(n_cen_fe_cls);
n_each_c(r, r2) = n_each_c(r, r2) + 1;
count = count + 1;
end
end