class DataSetAnalyzer: def __init__(self, file_path, file_name): self.validator = Validator() self.validator.is_folder(file_path) self.normalized = False self.norm = None self.file_name = file_name self.file_path = file_path self.data, self.header = self.set_data() self.rbfn = RBFN() def set_data(self): return CSVUtils(dir_path=self.file_path, file_name=self.file_name).get_data(has_header=None, header_line=None, transpose=False) def set_normalized(self, normalized): # set if data has to be normalized self.validator.is_bool(normalized) self.normalized = normalized def set_norm(self, norm): self.validator.in_list(norm, Constants().get_norms_list()) if norm is None: norm = Constants().get_l2_norm_abbreviation() if norm == 1 or norm == Constants().get_l1_norm_abbreviation(): norm = Constants().get_l1_norm_abbreviation() if norm == 2 or norm == Constants().get_l2_norm_abbreviation(): norm = Constants().get_l2_norm_abbreviation() if norm == Constants().get_max_norm_abbreviation(): norm = Constants().get_max_norm_abbreviation() self.norm = norm def set_rbfn(self, rbfn): self.validator.is_object_type(rbfn, RBFN()) self.rbfn = rbfn def get_data(self, transpose=False): if transpose: return np.array(self.data).T return np.array(self.data) def get_header(self): return self.header def get_file_path(self): return self.file_path def get_file_name(self): return self.file_name def get_targets_number(self): return len(self.data[0]) def get_features_number(self): return len(self.data) def get_feature_axis(self): return np.arange(1, len(self.data) + 1, 1) def get_targets(self, transpose=True): if (self.normalized): return self.get_normalized_target(transpose=transpose) if transpose == True: return np.transpose(self.data) return self.data def get_average_target(self): return np.mean(self.get_targets(), axis=0) def get_std_target(self): return np.std(self.get_targets(), axis=0) def get_variance_target(self): return np.var(self.get_targets(), axis=0) def get_mode_target(self): mode = stats.mode(self.get_targets()).mode return np.reshape(mode, self.get_features_number()) def get_median_target(self): return np.median(self.get_targets(), axis=0) def get_normalized_target(self, transpose=True): if transpose == True: data = np.transpose(self.data) else: data = self.data return normalize(data, norm=Constants().get_norm_abbreviation(self.norm), axis=1) def base_plot(self, y_axis, type, style='ro'): # a base plot which all other plots extend feature_axis = self.get_feature_axis() plt.plot(feature_axis, y_axis, style) title = 'Theoretical target' if (self.normalized): title += '(norm ' + Constants().get_norm_abbreviation( self.norm) + ') ' y_label += ' (normalized)' title += self.file_name + ' ' + type plt.suptitle(title, fontsize=15) plt.show() def plot_average(self): average_target = self.get_average_target() self.base_plot(average_target, 'mean', 'ro') def plot_std(self): std_target = self.get_std_target() self.base_plot(std_target, 'standard deviation', 'b^') def plot_mode(self): mode_target = self.get_mode_target() self.base_plot(mode_target, 'mode', 'y-') def plot_median(self): median_target = self.get_median_target() self.base_plot(median_target, 'median', 'k^') def plot_variance(self): variance_target = self.get_variance_target() self.base_plot(variance_target, 'variance', 'go') def plot_mean_and_std(self): average_target = self.get_average_target() std_target = self.get_std_target() feature_axis = self.get_feature_axis() red_patch = mpatches.Patch(color='red', label='average') blue_patch = mpatches.Patch(color='blue', label='standard deviation') plt.legend(handles=[red_patch, blue_patch]) plt.plot(feature_axis, std_target, 'b^', average_target, 'ro') plt.suptitle('Theoretical target ' + self.file_name + ' standard deviation and mean', fontsize=15) plt.show() def save_weigths_in_csv(self, centers, file_path, file_name=None, save_plot=False, class_id=None, float_format='%0.15f', remove_file=False): # perform rbfn targets = self.get_targets() if file_name is None: file_name = self.file_name rbfn = self.rbfn rbfn.set_centers(centers) CSV = CSVUtils(dir_path=file_path, file_name=file_name) file = CSV.create_name() is_file = self.validator.check_is_file(file) if is_file and remove_file: os.remove(file) index = 1 for p in targets: rbfn.set_target(p) rbfn.train() if save_plot == True: rbfn.plot(name=file_name, save=True, path=file_path, index=index) rbfn.save_weigths(file_path, file_name, class_id, float_format=float_format) index = index + 1 if (class_id): CSV.close_csv() def plot(self, centers, index, save=False): self.validator.index_exists(targets, index) p = targets[index] rbfn = self.rbfn rbfn.set_target(p) rbfn.set_centers(centers) rbfn.train() rbfn.plot(save) def plot_all(self, centers, name, save=False): targets = self.get_targets() for p in targets: rbfn = self.rbfn rbfn.set_target(p) rbfn.set_centers(centers) rbfn.train() rbfn.plot(name, save=save)
class RBFN: def __init__ (self, training_data=None, target=None): self.epsilon = Constants().get_rbfn_deafult_epsilon() self.k = Constants().get_rbfn_default_k() self.base_function_type = Constants().get_rbfn_gaussian_abbreviation() self.training_data = training_data self.target = target self.solver = Constants().get_solver_ls_abbreviation() self.validator = Validator() def calculate_base_function (self, center, x): # this function calculate one of all the RBF which will compose the final approximation radius = norm(center - x) epsilon = self.epsilon k = self.k function = self.base_function_type self.validator.in_list(function, Constants().get_rbfn_base_functions_list()) if function == Constants().get_rbfn_inv_multq_abbreviation(): return np.sqrt(1 / (1 + (epsilon * radius) ** 2)) if function == Constants().get_rbfn_inv_q_abbreviation(): return 1 / (1 + (epsilon * radius) ** 2) if function == Constants().get_rbfn_polyharmonic_abbreviation(): if (k % 2) == 0: return radius ** k return radius ** k * np.log(radius) if function == Constants().get_rbfn_gaussian_abbreviation(): return np.exp(- (epsilon * radius) ** 2) # Gaussian like default function return np.exp(- (epsilon * radius) ** 2) def set_centers (self, centers): self.centers = centers self.num_centers = len(centers) def set_epsilon (self, epsilon): self.epsilon = epsilon def set_base_function_type (self, base_function_type): self.base_function_type = base_function_type def set_k (self): self.k = int(k) def set_training_data (self, training_data): self.training_data = training_data def set_target (self, target): self.target = target def set_solvers (self, solver): self.validator.in_list(solver, Constants().get_valid_solvers()) self.solver = solver def get_weights (self): return self.weights def activation_function (self): # calculate the interpolation matrix, evaluating points against centers training_data = self.training_data interpolation_matrix = np.zeros((training_data.shape[0], self.num_centers), float) for center_index, center in enumerate(self.centers): for xi, x in enumerate(training_data): interpolation_matrix[xi, center_index] = self.calculate_base_function(center, x) return interpolation_matrix def train (self): # train the model, set the weigths. # calculate output weights using pseudoinverse or leat squares. norm_matrix = self.activation_function() if self.solver == Constants().get_solver_pinv_abbreviation(): self.weights = dot(pinv(norm_matrix), self.target) if self.solver == Constants().get_solver_ls_abbreviation(): self.weights = np.linalg.lstsq(norm_matrix, self.target, rcond=None)[0] def test (self): # test the model interpolation_matrix = self.activation_function() target_calculated = np.dot(interpolation_matrix, self.weights) return target_calculated def init_plot_center (self): plt.plot(self.centers, np.zeros(self.num_centers), 'gs') def init_plot_test (self): test = self.test() plt.plot(self.training_data, test, 'r-') def plot (self, name=None, save=False, path=None, index=None): # utils to plot trained (original) data and tested data training_data = self.training_data plt.figure(figsize=(10, 6)) plt.plot(training_data, self.target, 'k-') self.init_plot_test() self.init_plot_center() red_patch = mpatches.Patch(color='red', label='Tested data') black_patch = mpatches.Patch(color='black', label='Training data') green_patch = mpatches.Patch(color='green', label='Centers') plt.legend(handles=[red_patch, black_patch, green_patch]) if (save == True): plt.savefig( path + name + '-' + str(index) + '-' + str(np.random.randint(0, 1000000)) + '.png', bbox_inches='tight') plt.close() else: plt.show() def save_weigths (self, file_path, file_name, class_id=None, float_format=Constants().get_default_float_format(), remove_file=False): # utils to save weigths in a CSV self.validator.is_folder(file_path) CSV = CSVUtils(file_name=file_name, dir_path=file_path) file = CSV.create_name() is_file = self.validator.check_is_file(file) if is_file and remove_file: os.remove(file) weights = self.get_weights() if (class_id): weights = insert(weights, 0, class_id) CSV.save(weights, header_read=None, header_insert=False, float_format=float_format, index=False)