init commit

2025-11-08 18:45:48 +01:00
parent 6d1d845c4e
commit 2f82041a21
9 changed files with 1393 additions and 0 deletions
--- a/MC_NDCC_matlab/NDCC.m
+++ b/MC_NDCC_matlab/NDCC.m
@@ -0,0 +1,39 @@
+%{
+NORMALLY DISTRIBUTED CLUSTERS is a data generator. 
+It generates a series of random centers for multivariate
+normal distributions. NDC randomly generates a fraction
+of data for each center, i.e. what fraction of data points
+will come from this center. NDC randomly generates a 
+separating plane. Based on this plane, classes for are 
+chosen for each center. NDC then randomly generates the 
+points from the distributions. NDC can increase 
+inseparability by increasng variances of distributions.
+A measure of "true" separability is obtained by looking 
+at how many points end up on the wrong side of the 
+separating plane. All values are taken as integers 
+for simplicity.
+%}
+
+
+centers_list = [100, 300, 500];
+n_samples  = input('Enter the number of samples:\n');
+n_features = input('Enter the number of features:\n');
+n_classes  = input('Enter the number of classes:\n');
+
+
+% Generating center matrix based on centers_list and number of features
+centers_matrix = get_centers_mat(centers_list, n_features);
+n_centers = 2*length(centers_list)*n_features;
+
+% The same number of randomly chosen centers will dedicate to each class
+class_locations = class_center_locations(n_classes, n_centers);
+
+% Deciding randomly that how many samples should be in each class_locations
+ss = sample_spliter(n_samples, n_classes, n_centers);
+
+%Generating dataset 
+ds = generate_dataset(centers_matrix, ss,class_locations, n_features);
+
+%Saving the dataset as a csv file in current directory
+writematrix(ds, 'dataset.csv');
+    
--- a/MC_NDCC_matlab/class_center_locations.m
+++ b/MC_NDCC_matlab/class_center_locations.m
@@ -0,0 +1,27 @@
+function cls_locs = class_center_locations(n_classes, n_centers);
+    %{
+        
+        *** This function specifies which center points belong 
+        to which classes
+    
+        *** It returns a matrix in size of n_classess by
+        n_centers_for_each_class that means a row for each class
+
+    %}
+
+    % Generate list of random non-repeatative numbers from 1 to n_center 
+    locs = datasample(1:n_centers,n_centers,'Replace',false);
+    
+    % number of centers for each class
+    n_cen_fe_cls = int32(floor(n_centers/n_classes));
+    
+    cls_locs = zeros(n_classes,n_cen_fe_cls);
+    k = 1;
+    for i = 1:n_classes
+        for j = 1:n_cen_fe_cls
+            cls_locs(i,j) = locs(k);
+            k = k+1;
+        end
+    end
+
+end
--- a/MC_NDCC_matlab/dataset.csv
+++ b/MC_NDCC_matlab/dataset.csv
--- a/MC_NDCC_matlab/generate_dataset.m
+++ b/MC_NDCC_matlab/generate_dataset.m
@@ -0,0 +1,30 @@
+function M = generate_dataset(centers_matrix, ss,class_locations, n_features)
+    %{
+    
+        *** This function returns the generated dataset matrix with
+        coresponding labels 
+        
+        *** Samples of each center generated using normal distribution 
+        function with mu as the center location and sigma as 5 
+
+        *** Size of the samples are given from sample spliter funtion
+        
+    %}
+
+    [r, c] = size(class_locations);
+    %Intialize the matrix as an empty matrix
+    M = zeros(0, n_features);
+    l = zeros(0, 1);
+    for i = 1:r
+        for j = 1:c
+            %Generate samples in specific center point(mu) and (sigma = 5,
+            %and) in size of ss by n_features
+            tmp = normrnd(centers_matrix(int32(class_locations(i,j))), 5,[int32(ss(i,j)), n_features]);
+            label_tmp = ones(int32(ss(i,j)), 1)*(i);
+         
+            l = [l; label_tmp];
+            M = [M; tmp];
+        end
+    end
+    M = [M, l];
+end
--- a/MC_NDCC_matlab/get_centers_mat.m
+++ b/MC_NDCC_matlab/get_centers_mat.m
@@ -0,0 +1,17 @@
+function [centers_matrix] = get_centers_mat(centers_list, n_features)
+    %{
+
+        *** This function returns the matrix of center locations 
+            based on centers_list in n_features space
+
+    %}
+
+    n_centers = 2*length(centers_list) * n_features;
+    centers_matrix = zeros(n_centers, n_features);
+    for i = 1:length(centers_list)
+        for j = 1:n_features
+            centers_matrix((i-1)*2*n_features + (2*j)-1, j)   =  centers_list(i);
+            centers_matrix((i-1)*2*n_features + (2*j), j) = -centers_list(i);
+        end
+    end
+end
--- a/MC_NDCC_matlab/sample_spliter.m
+++ b/MC_NDCC_matlab/sample_spliter.m
@@ -0,0 +1,19 @@
+function n_each_c = sample_spliter(n_samples, n_classes, n_centers)
+    %{
+
+        *** This function generates the number of samples belongs 
+        to each class
+
+        *** Centers approximately have n_centers/n_classes samples with a small variance 
+ 
+    %} 
+    count = 0;
+    n_cen_fe_cls = int32(floor(n_centers/n_classes));
+    n_each_c = zeros(n_classes, n_cen_fe_cls);
+    while n_samples > count
+        r = randi(n_classes);
+        r2 = randi(n_cen_fe_cls);
+        n_each_c(r, r2) = n_each_c(r, r2) + 1;
+        count = count + 1;
+    end
+end