def __data_preparation(self, path_input_dir, N): assert os.path.exists(path_input_dir) assert isinstance(N, int) list_of_path = data_loader.make_path_pic_list(path_input_dir) index_datapath_mapper, dataset = data_loader.make_data_matrix(list_of_input_files=list_of_path) self.dataset = dataset self.index_datapath_mapper = index_datapath_mapper train_test_object = data_loader.split_data_train_and_test(self.dataset, N) self.y_train = train_test_object['train'] self.y_test = train_test_object['test'] self.N_test = train_test_object['N_test'] if self.is_add_noise==True: x_train = self.add_noise(train_set=train_test_object['train'], noise_ratio=self.noise_rate) x_test = self.add_noise(train_set=train_test_object['test'], noise_ratio=self.noise_rate) n_dimension = x_train.shape[1] else: x_train = train_test_object['train'] x_test = train_test_object['test'] n_dimension = x_train.shape[1] assert isinstance(x_train, np.ndarray) assert isinstance(x_test, np.ndarray) assert len(x_train.shape) == 2 assert len(x_test.shape) == 2 self.x_train = x_train self.x_test = x_test self.n_dimension = n_dimension
def main(list_of_input_files, path_to_save_directory, project_name): assert isinstance(list_of_input_files, list) assert os.path.exists(path_to_save_directory) assert isinstance(project_name, (str, unicode)) if os.path.exists(os.path.join(path_to_save_directory, project_name))==False: os.mkdir(os.path.join(path_to_save_directory, project_name)) index_datapath_mapper, data_matrix = data_loader.make_data_matrix(list_of_input_files=list_of_input_files) train = FacePicDataSet(data=data_matrix) train.use_design_loc(os.path.join(path_to_save_directory, project_name, '{}.npy'.format(project_name))) train_csv_path = os.path.join(path_to_save_directory, project_name, '{}.csv'.format(project_name)) train_pkl_path = os.path.join(path_to_save_directory, project_name, '{}.pkl'.format(project_name)) # save in pickle serial.save(train_pkl_path, train) # save index_datasource_dict with codecs.open(os.path.join(path_to_save_directory, project_name, '{}_index_data.json'.format(project_name)), 'w', 'utf-8') as f: f.write(json.dumps(index_datapath_mapper, indent=4, ensure_ascii=False))