def main(argv): parser = argparse.ArgumentParser() parser.add_argument('--download_data', action='store_true', default=False, help='Turn on to download data to disk.') parser.add_argument('--augment_data', action='store_true', default=False, help='Turn on to augment raw data.') parser.add_argument('--raw_image_directory', default='../data/raw/', help='Directory for downloaded images') parser.add_argument('--augmented_image_directory', default='../data/processed/', help='Augmented image directory') parser.add_argument('--augmented_image_filename', default='augmented_images', help='Augmented images filename') parser.add_argument('--batch_size', type=int, default=1, help='Batch size for training') parser.add_argument('--n_epochs', type=int, default=1000, help='Number of training epochs.') parser.add_argument('--saved_model_directory', default='../models/', help='Directory for saving trained models') parser.add_argument('--learning_rate', type=float, default=0.001, help='Optimizer learning rate') parser.add_argument('--early_stopping_max_checks', type=int, default=20, help='Max checks without improvement for early stopping') parser.add_argument('--train', action='store_true', default=False, help='Set to True to train network') parser.add_argument('--infer', action='store_true', default=False, help='Set to True to conduct inference on Test images. Trained model must be loaded.') parser.add_argument('--load_checkpoint', type=str, default=None, help='Load saved checkpoint, arg=checkpoint_name') args = parser.parse_args() os.makedirs(args.raw_image_directory, exist_ok=True) os.makedirs(args.augmented_image_directory, exist_ok=True) os.makedirs(args.saved_model_directory, exist_ok=True) if args.download_data: make_dataset(args) if args.augment_data: augment_data(args) data = np.load(os.path.join(args.augmented_image_directory, args.augmented_image_filename + '.npz')) model = Model(args, data) model.build_net() if args.train: model.train() if args.infer and args.load_checkpoint is not None: model.infer() else: print('Trained model needs to be loaded for inference.')
def process(file_name, data): img_data = np.zeros((len(data_tuple), 1, IMAGE_HEIGHT, IMAGE_WIDTH), dtype=np.float32) label_seq = 73 * np.ones( (len(data_tuple), LABEL_SEQ_LEN), dtype=np.float32) for i, datum in enumerate(data_tuple): img_path, numbers, do_aug = datum label_seq[i, :len(numbers)] = numbers #img = caffe.io.load_image(img_path, color=False) #load as grayscale #img = caffe.io.resize(img, (IMAGE_HEIGHT, IMAGE_WIDTH, 1)) img = Image.open(img_path).convert('L') #img = cv2.imread(img_path) if do_aug: img = augment_data(img) if img is None: continue #img = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY) img = img.resize((IMAGE_WIDTH, IMAGE_HEIGHT)) img = np.array(img) img = img[..., np.newaxis] #img = img/255. img = np.transpose(img, (2, 0, 1)) img_data[i] = img #""" if (i + 1) % 1000 == 0: print('[+] ###{} name: {}'.format(i, img_path)) print('[+] number: {}'.format(','.join( map(lambda x: str(x), numbers)))) print('[+] label: {}'.format(','.join( map(lambda x: str(x), label_seq[i])))) #""" with h5py.File(file_name, 'w') as f: f.create_dataset('data', data=img_data) f.create_dataset('label', data=label_seq) print('=== H5 data written to ', file_name)
def get_training_data(dataframe): # print(ravdess.head()) print(dataframe.head()) X = [] Y = [] for index, row in dataframe.iterrows(): # print(index, row) emotion = row["emotion"] path = row["path"] print("processing file", path) # print(emotion, path) # duration and offset are used to take care of the no audio # in start and the ending of each audio files as seen above. data, sample_rate = librosa.load(path, duration=2.5, offset=0.6) # augment_data returns the original data as the first element for augmented_data in augment_data(data, sample_rate): features = get_features(augmented_data, sample_rate) # Storing a single list of all the features plus the last one is the target label X.append(features) Y.append(emotion) df = pd.DataFrame(X) df["label"] = Y return df
def data_load2(input_file, image_size, multiplier): # with pandas read input files train_set = pd.read_csv(input_file) # train_set = pd.read_csv('/home/sebastian/kaggle/digit_recognizer/dataset/train.csv') # i could read also submit data that is used for submit but i will do it in some other function # df_submit = pd.read_csv('/home/sebastian/kaggle/digit_recognizer/dataset/test.csv') # from train data get labels # ----------------------------label_0 label_1 label_2 label_3 label_4 label_5 label_6 ..... label_9 # it is changing digit 4 into 0 0 0 0 1 0 0 ..... 0 train_set = pd.get_dummies(train_set, columns=['label']) # this assigns(or maybe copies)all columns apart from last 10 to a new variable # new variable is of type ndarray train_set_features = train_set.iloc[:, :-10].values # and this just extract last 10 columns because they will be used as labels train_set_labels = train_set.iloc[:, -10:].values # this is about splitting set to train and validation # training_features is list of features (inputs) and training_labels are labels used for training # testing_all_features is list of features (inputs) and testing_all_labels are labels used for testing # and validation at the end of training # i am not sure about random_state: should I keep seed equal 1212 or maybe leave it random? training_features, testing_all_features, training_labels, testing_all_labels = \ train_test_split(train_set_features, train_set_labels, test_size=0.2, random_state=1212) # here test set is split again for testing and validation (of the accuracy at the end of learning) testing_features, validating_features, testing_labels, validating_labels = \ train_test_split(testing_all_features, testing_all_labels, test_size=0.5, random_state=0) # reformat data training_dataset = reformat(training_features, image_size) testing_dataset = reformat(testing_features, image_size) validating_dataset = reformat(validating_features, image_size) # call augmentation training_dataset, training_labels = augment_data.augment_data( training_dataset, training_labels, multiplier) testing_dataset, testing_labels = augment_data.augment_data( testing_dataset, testing_labels, multiplier) return training_dataset, testing_dataset, validating_dataset, training_labels, testing_labels, validating_labels
def next_batch(self): # Shuffle the data if self._next_index == 0: perm = np.arange(self._num_examples) np.random.shuffle(perm) self._filenames = [self.filenames[i] for i in perm] self._labels = self.labels[perm] batch_size = self._batch_size start = self._next_index end = self._next_index + batch_size if end > self._num_examples: self._next_index = 0 start = self._next_index end = self._next_index + batch_size self._num_epoches += 1 else: self._next_index = end images = np.zeros( [batch_size, self._img_h, self._img_w, self._num_channels]) # labels = np.zeros([batch_size, self._label_len]) for j, i in enumerate(range(start, end)): fname = self._filenames[i] img = cv2.imread(os.path.join(self._img_dir, fname)) ### ADD DATA AUGMENTATION ### img = augment_data(img) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.resize(img, (self._img_w, self._img_h), interpolation=cv2.INTER_CUBIC) images[j, ...] = img[..., np.newaxis] images = np.transpose(images, axes=[0, 2, 1, 3]) labels = self._labels[start:end, ...] targets = [np.asarray(i) for i in labels] sparse_labels = sparse_tuple_from(targets) # input_length = np.zeros([batch_size, 1]) seq_len = np.ones(self._batch_size) * 24 return images, sparse_labels, seq_len
def main(): print('Starting Training Data Preprocessing') arg_list = [] for arg in sys.argv[1:]: arg_list.append(arg) if len(arg_list) < 3: print( 'Use -> python3 PreprocessTrainingData.py /ImageData/training/images/ /ImageData/training/labels/ /ImageData/training/augmented/' ) print( 'Secondary augmentation strength(int value or path to config file, no input for default 0)' ) print('Tertiary augmentation strength /ImageData/augmentedtraining/') print( 'Example: python3 PreprocessTrainingData.py /ImageData/training/images/ /ImageData/training/labels/ 0 5 /ImageData/training/augmented/' ) return # counting number of training sets provided ends = [] augmentation_level = [] third_aug_lvl = [] i = 2 while i <= len(arg_list): if arg_list[i].isdigit( ) or arg_list[i] == '-1' or arg_list[i].endswith('.ini'): if arg_list[i + 1].isdigit(): ends.append(i - 1) augmentation_level.append(arg_list[i]) third_aug_lvl.append(arg_list[i + 1]) i += 4 else: ends.append(i - 1) augmentation_level.append(arg_list[i]) third_aug_lvl.append('0') i += 3 else: ends.append(i - 1) augmentation_level.append('-1') third_aug_lvl.append('0') i += 2 num_training_sets = len(augmentation_level) print('num_training_sets:', num_training_sets) print('augmentation_level:', augmentation_level) print('thrid_augmentation_level:', third_aug_lvl) # print('ends:', ends) for j in range(num_training_sets): training_img_path = arg_list[ends[j] - 1] print('Training Image Path:', training_img_path) label_img_path = arg_list[ends[j]] print('Training Label Path:', label_img_path) strength = augmentation_level[j] print('Secondary Augmentation level:', strength) third_str = third_aug_lvl[j] print('Tertiary Augmentation level:', third_str) outdir = arg_list[len(arg_list) - 1] print('Output Path:', outdir) if not os.path.isdir(outdir): os.mkdir(outdir) # ---------------------------------------------------------------------------------------- # Load train labels # ---------------------------------------------------------------------------------------- print('Loading:') print(label_img_path) lblstack = imageimporter(label_img_path) print('Verifying labels') checkpoint_isbinary(lblstack) if np.max(lblstack[:]) != 1: lblstack = lblstack > 0 # ---------------------------------------------------------------------------------------- # apply denoising # ---------------------------------------------------------------------------------------- if strength == '-1': print('Running image enhancement') enhanced_path = os.path.join(outdir, 'enhanced_v' + str(j + 1)) run_enhancement = 'enhance_stack.py ' + training_img_path + ' ' + enhanced_path + ' 2' os.system(run_enhancement) training_img_path = enhanced_path # ---------------------------------------------------------------------------------------- # Load training images # ---------------------------------------------------------------------------------------- print('Loading:') print(training_img_path) imgstack = imageimporter(training_img_path) print('Verifying images') checkpoint_nobinary(imgstack) # ---------------------------------------------------------------------------------------- # Check size of images and labels # ---------------------------------------------------------------------------------------- [imgstack, lblstack] = check_img_dims(imgstack, lblstack, 325) # ---------------------------------------------------------------------------------------- # Augment the data, generating 16 versions and save # ---------------------------------------------------------------------------------------- img_v1 = imgstack.astype('float32') lb_v1 = lblstack.astype('float32') del imgstack del lblstack d_details = '/data' l_details = '/label' ext = ".h5" print('Augmenting training data 1-8 and 9-16') for i in range(8): # v1-8 img, lb = augment_data(img_v1, lb_v1, i) img_n, lb_n = img.astype(np.uint8), lb.astype( np.uint8) # augmentations 1-8 inv_img_n = np.flip(img, 0).astype(np.uint8) # augmentations 9-16 inv_lb_n = np.flip(lb, 0).astype(np.uint8) # augmentations 9-16 del img, lb if strength != '-1': img_result, lb_result, addtl_choices = addtl_augs( strength, img_n, lb_n, i) # apply secondary augmentations else: img_result, lb_result = img_n, lb_n del img_n, lb_n img_result_r, lb_result_r = third_augs( third_str, img_result, lb_result, i) # apply tertiary augmentations img_result_f = img_result_r.astype('float32') lb_result_f = lb_result_r.astype('float32') filename = os.path.abspath( outdir) + '/' + 'training_full_stacks_v{0}_{1}.h5'.format( str(j + 1), str(i + 1), ext) print('Saving: ', filename) hdf5_file = h5py.File(filename, mode='w') img_result_f, lb_result_f = dim_convert(img_result_f, lb_result_f) hdf5_file.create_dataset(d_details, data=img_result_f) hdf5_file.create_dataset(l_details, data=lb_result_f) hdf5_file.close() del img_result, img_result_r, img_result_f del lb_result, lb_result_r, lb_result_f # v9-16 if strength != '-1': inv_img_result, inv_lb_result, inv_addtl_choices = addtl_augs( strength, inv_img_n, inv_lb_n, i + 8) # apply secondary augmentations else: inv_img_result, inv_lb_result = inv_img_n, inv_lb_n del inv_img_n, inv_lb_n inv_img_result_r, inv_lb_result_r = third_augs( third_str, inv_img_result, inv_lb_result, i + 8) inv_img_result_f = inv_img_result_r.astype('float32') inv_lb_result_f = inv_lb_result_r.astype('float32') filename = os.path.abspath( outdir) + '/' + 'training_full_stacks_v{0}_{1}.h5'.format( str(j + 1), str(i + 1 + 8), ext) print('Saving: ', filename) hdf5_file = h5py.File(filename, mode='w') inv_img_result_f, inv_lb_result_f = dim_convert( inv_img_result_f, inv_lb_result_f) hdf5_file.create_dataset(d_details, data=inv_img_result_f) hdf5_file.create_dataset(l_details, data=inv_lb_result_f) hdf5_file.close() del inv_img_result, inv_img_result_r, inv_img_result_f del inv_lb_result, inv_lb_result_r, inv_lb_result_f if strength == '-1': config_export.writecfg_den(outdir, j + 1, strength, third_str) else: config_export.writecfg( outdir, j + 1, addtl_choices, strength, third_str) # write configuration file into augmentation folder print('\n-> Training data augmentation completed') print('Training data stored in ', outdir) print('For training your model please run runtraining.sh ', outdir, '<desired output directory>\n')