def __separate_data(self, data_filename): data_analyzer = DataAnalyzer(data_filename) self.mean_list = data_analyzer.get_mean_list() self.stdev_list = data_analyzer.get_stdev_list() # feature scale, and add column of 1s to X all_X = data_analyzer.X all_X = self.__apply_feature_scaling(all_X) all_X = np.c_[np.ones(all_X.shape[0]), all_X] all_Y = data_analyzer.Y all_data = np.c_[all_X, all_Y] np.random.shuffle(all_data) split_row_index = int(all_data.shape[0] * 0.8) # top 80% of rows will be for training training_data = all_data[:split_row_index, :] validation_data = all_data[split_row_index:, :] self.training_X = training_data[:, :-1] self.training_Y = training_data[:, -1] self.training_Y = self.training_Y.reshape(self.training_Y.shape[0], 1) self.validation_X = validation_data[:, :-1] self.validation_Y = validation_data[:, -1] self.validation_Y = self.validation_Y.reshape( self.validation_Y.shape[0], 1)
def __init_training_data(self, training_filename): data_analyzer = DataAnalyzer(training_filename) self.mean_list = data_analyzer.get_mean_list() self.stdev_list = data_analyzer.get_stdev_list() # feature scale, and add column of 1s to X self.training_X = data_analyzer.X self.training_X = self.__apply_feature_scaling(self.training_X) self.training_X = np.c_[np.ones(self.training_X.shape[0]), self.training_X] self.training_Y = data_analyzer.Y