init commit
This commit is contained in:
39
MC_NDCC_matlab/NDCC.m
Executable file
39
MC_NDCC_matlab/NDCC.m
Executable file
@@ -0,0 +1,39 @@
|
||||
%{
|
||||
NORMALLY DISTRIBUTED CLUSTERS is a data generator.
|
||||
It generates a series of random centers for multivariate
|
||||
normal distributions. NDC randomly generates a fraction
|
||||
of data for each center, i.e. what fraction of data points
|
||||
will come from this center. NDC randomly generates a
|
||||
separating plane. Based on this plane, classes for are
|
||||
chosen for each center. NDC then randomly generates the
|
||||
points from the distributions. NDC can increase
|
||||
inseparability by increasng variances of distributions.
|
||||
A measure of "true" separability is obtained by looking
|
||||
at how many points end up on the wrong side of the
|
||||
separating plane. All values are taken as integers
|
||||
for simplicity.
|
||||
%}
|
||||
|
||||
|
||||
centers_list = [100, 300, 500];
|
||||
n_samples = input('Enter the number of samples:\n');
|
||||
n_features = input('Enter the number of features:\n');
|
||||
n_classes = input('Enter the number of classes:\n');
|
||||
|
||||
|
||||
% Generating center matrix based on centers_list and number of features
|
||||
centers_matrix = get_centers_mat(centers_list, n_features);
|
||||
n_centers = 2*length(centers_list)*n_features;
|
||||
|
||||
% The same number of randomly chosen centers will dedicate to each class
|
||||
class_locations = class_center_locations(n_classes, n_centers);
|
||||
|
||||
% Deciding randomly that how many samples should be in each class_locations
|
||||
ss = sample_spliter(n_samples, n_classes, n_centers);
|
||||
|
||||
%Generating dataset
|
||||
ds = generate_dataset(centers_matrix, ss,class_locations, n_features);
|
||||
|
||||
%Saving the dataset as a csv file in current directory
|
||||
writematrix(ds, 'dataset.csv');
|
||||
|
||||
27
MC_NDCC_matlab/class_center_locations.m
Executable file
27
MC_NDCC_matlab/class_center_locations.m
Executable file
@@ -0,0 +1,27 @@
|
||||
function cls_locs = class_center_locations(n_classes, n_centers);
|
||||
%{
|
||||
|
||||
*** This function specifies which center points belong
|
||||
to which classes
|
||||
|
||||
*** It returns a matrix in size of n_classess by
|
||||
n_centers_for_each_class that means a row for each class
|
||||
|
||||
%}
|
||||
|
||||
% Generate list of random non-repeatative numbers from 1 to n_center
|
||||
locs = datasample(1:n_centers,n_centers,'Replace',false);
|
||||
|
||||
% number of centers for each class
|
||||
n_cen_fe_cls = int32(floor(n_centers/n_classes));
|
||||
|
||||
cls_locs = zeros(n_classes,n_cen_fe_cls);
|
||||
k = 1;
|
||||
for i = 1:n_classes
|
||||
for j = 1:n_cen_fe_cls
|
||||
cls_locs(i,j) = locs(k);
|
||||
k = k+1;
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
1000
MC_NDCC_matlab/dataset.csv
Executable file
1000
MC_NDCC_matlab/dataset.csv
Executable file
File diff suppressed because it is too large
Load Diff
30
MC_NDCC_matlab/generate_dataset.m
Executable file
30
MC_NDCC_matlab/generate_dataset.m
Executable file
@@ -0,0 +1,30 @@
|
||||
function M = generate_dataset(centers_matrix, ss,class_locations, n_features)
|
||||
%{
|
||||
|
||||
*** This function returns the generated dataset matrix with
|
||||
coresponding labels
|
||||
|
||||
*** Samples of each center generated using normal distribution
|
||||
function with mu as the center location and sigma as 5
|
||||
|
||||
*** Size of the samples are given from sample spliter funtion
|
||||
|
||||
%}
|
||||
|
||||
[r, c] = size(class_locations);
|
||||
%Intialize the matrix as an empty matrix
|
||||
M = zeros(0, n_features);
|
||||
l = zeros(0, 1);
|
||||
for i = 1:r
|
||||
for j = 1:c
|
||||
%Generate samples in specific center point(mu) and (sigma = 5,
|
||||
%and) in size of ss by n_features
|
||||
tmp = normrnd(centers_matrix(int32(class_locations(i,j))), 5,[int32(ss(i,j)), n_features]);
|
||||
label_tmp = ones(int32(ss(i,j)), 1)*(i);
|
||||
|
||||
l = [l; label_tmp];
|
||||
M = [M; tmp];
|
||||
end
|
||||
end
|
||||
M = [M, l];
|
||||
end
|
||||
17
MC_NDCC_matlab/get_centers_mat.m
Executable file
17
MC_NDCC_matlab/get_centers_mat.m
Executable file
@@ -0,0 +1,17 @@
|
||||
function [centers_matrix] = get_centers_mat(centers_list, n_features)
|
||||
%{
|
||||
|
||||
*** This function returns the matrix of center locations
|
||||
based on centers_list in n_features space
|
||||
|
||||
%}
|
||||
|
||||
n_centers = 2*length(centers_list) * n_features;
|
||||
centers_matrix = zeros(n_centers, n_features);
|
||||
for i = 1:length(centers_list)
|
||||
for j = 1:n_features
|
||||
centers_matrix((i-1)*2*n_features + (2*j)-1, j) = centers_list(i);
|
||||
centers_matrix((i-1)*2*n_features + (2*j), j) = -centers_list(i);
|
||||
end
|
||||
end
|
||||
end
|
||||
19
MC_NDCC_matlab/sample_spliter.m
Executable file
19
MC_NDCC_matlab/sample_spliter.m
Executable file
@@ -0,0 +1,19 @@
|
||||
function n_each_c = sample_spliter(n_samples, n_classes, n_centers)
|
||||
%{
|
||||
|
||||
*** This function generates the number of samples belongs
|
||||
to each class
|
||||
|
||||
*** Centers approximately have n_centers/n_classes samples with a small variance
|
||||
|
||||
%}
|
||||
count = 0;
|
||||
n_cen_fe_cls = int32(floor(n_centers/n_classes));
|
||||
n_each_c = zeros(n_classes, n_cen_fe_cls);
|
||||
while n_samples > count
|
||||
r = randi(n_classes);
|
||||
r2 = randi(n_cen_fe_cls);
|
||||
n_each_c(r, r2) = n_each_c(r, r2) + 1;
|
||||
count = count + 1;
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user