def _parse_train(example_proto): """ Parse examples in training dataset from tfrecords """ features = { "image_gt": tf.FixedLenFeature((), tf.string), "image_n": tf.FixedLenFeature((), tf.string), "crop_dim": tf.FixedLenFeature((), tf.int64), "sigma": tf.FixedLenFeature((), tf.int64) } parsed_features = tf.parse_single_example(example_proto, features) dim = tf.cast(parsed_features['crop_dim'], tf.int64) #whether this is needed? sigma = tf.cast(parsed_features['sigma'], tf.int64) # from IPython import embed; embed(); exit() image_gt = tf.decode_raw(parsed_features['image_gt'], tf.float32) image_n = tf.decode_raw(parsed_features['image_n'], tf.float32) image_gt = tf.cast(tf.reshape(image_gt, tf.stack([dim, dim, 1])), tf.float32) image_n = tf.cast(tf.reshape(image_n, tf.stack([dim, dim, 1])), tf.float32) decision = tf.random_uniform([2], 0, 1) image_gt = random_flip(image_gt, decision[0]) image_n = random_flip(image_n, decision[0]) return image_gt, image_n
def __getitem__(self, i): image = cv2.imread(self.images[i]) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype("float32") objects = self.objects[i] boxes = np.array(objects['boxes']).astype("float32") labels = np.array(objects['labels']) difficulties = np.array(objects['difficulties']) if not self.keep_difficult: boxes = boxes[1 - difficulties] labels = labels[1 - difficulties] difficulties = difficulties[1 - difficulties] if self.split == 'TRAIN' and self.data_argu: data_enhance = [ random_bright, random_contrast, random_saturation, random_hue ] random.shuffle(data_enhance) for d in data_enhance: image = d(image) if random.random() < 0.5: image, boxes = random_expand(image, boxes) image, boxes, labels, difficulties = random_crop( image, boxes, labels, difficulties) image, boxes = random_flip(image, boxes) height, width, _ = image.shape image = cv2.resize(image, (300, 300)) image /= 255. image = (image - self.mean) / self.std image = image.transpose((2, 0, 1)).astype("float32") boxes[:, [0, 2]] /= width boxes[:, [1, 3]] /= height return image, boxes, labels, difficulties
def train_next_batch(self, batch_size): batch_indexs = np.random.choice(range(self.train_data.shape[0]), batch_size, replace=False) batch_imgs = self.train_data[batch_indexs] # resize (32, 32, 3) to (64, 64, 3) and random flip batch_imgs_ = [utils.random_flip( utils.transform(scipy.misc.imresize(batch_imgs[idx], (self.image_size[0], self.image_size[1])))) for idx in range(batch_imgs.shape[0])] return np.asarray(batch_imgs_)
def train_transformation(data, label): data = random_flip(data) data = nd.array(data/255.0).astype('float32') if np.random.uniform() > 0.5: data = data.transpose((1,2,0)) aug1 = mx.image.RandomCropAug([224,224]) data = aug1(data) data = mx.image.imresize(data, 256, 256) data = data.transpose((2,0,1)) return data,nd.array([label]).asscalar().astype('float32')
def _aug_image(self, instance, net_h, net_w): image_name = instance['filename'] image = cv2.imread(image_name) # RGB image if image is None: print('Cannot find ', image_name) image = image[:, :, ::-1] # RGB image image_h, image_w, _ = image.shape # determine the amount of scaling and cropping dw = self.jitter * image_w dh = self.jitter * image_h new_ar = (image_w + np.random.uniform(-dw, dw)) / ( image_h + np.random.uniform(-dh, dh)) scale = np.random.uniform(0.25, 2) if new_ar < 1: new_h = int(scale * net_h) new_w = int(net_h * new_ar) else: new_w = int(scale * net_w) new_h = int(net_w / new_ar) dx = int(np.random.uniform(0, net_w - new_w)) dy = int(np.random.uniform(0, net_h - new_h)) # apply scaling and cropping im_sized = apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy) # randomly distort hsv space im_sized = random_distort_image(im_sized) # randomly flip flip = np.random.randint(2) im_sized = random_flip(im_sized, flip) # correct the size and pos of bounding boxes all_objs = correct_bounding_boxes(instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h) return im_sized, all_objs
def gen_train(self): x_batch, y_batch = self._batch_init() iteration = 0 i = 0 while iteration < self._num_iterations: # shuffling all batches self._shuffle_train() for idx in self._idcs_train: # extract data from dict x_batch[i], y_batch[i] = random_flip( self._train[idx], onehot(self._train_label[idx], self._num_classes)) i += 1 if i >= self._batch_size: yield x_batch, y_batch x_batch, y_batch = self._batch_init() i = 0 iteration += 1
def train_next_batch(self, batch_size): batch_paths = np.random.choice(self.train_data, batch_size, replace=False) batch_imgs = [ utils.load_data(batch_path, input_height=self.input_height, input_width=self.input_width, is_gray_scale=True) for batch_path in batch_paths ] batch_imgs_ = [ utils.random_flip( utils.transform( cv2.resize(batch_imgs[idx], (self.image_size[0], self.image_size[1]), 1))) for idx in range(len(batch_imgs)) ] return np.asarray(batch_imgs)
def __getitem__(self, index): array = np.load(self.paths[index]) label = torch.FloatTensor([self.labels[index]]) weight = torch.FloatTensor([self.weights[self.labels[index]]]) if self.train: # data augmentation array = ut.random_shift(array, 25) array = ut.random_rotate(array, 25) array = ut.random_flip(array) # data standardization array = (array - 58.09) / 49.73 array = np.stack((array, ) * 3, axis=1) array = torch.FloatTensor(array) # array size is now [S, 224, 224, 3] return array, label, weight
def eval_model(model,dataset,num_samples): model.eval() criterion = nn.MSELoss() step = 0 val_loss = 0 count = 0 sampler = RandomSampler(dataset) torch.manual_seed(0) for sample_id in tqdm(sampler): if step==num_samples: break data = dataset[sample_id] img_pth, label = utils.choose_image(data['steering_angle']) img = cv2.imread(data[img_pth]) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = utils.preprocess(img) img, label = utils.random_flip(img, label) img, label = utils.random_translate(img, label, 100, 10) img = utils.random_shadow(img) img = utils.random_brightness(img) img = Variable(torch.cuda.FloatTensor([img])) img = img.permute(0,3,1,2) label = np.array([label]).astype(float) label = Variable(torch.cuda.FloatTensor(label)) out_vec = model(img) loss = criterion(out_vec,label) batch_size = 4 val_loss += loss.data.item() count += batch_size step += 1 val_loss = val_loss / float(count) return val_loss
recon_loss_e = 0 seq_l2_loss_e = 0 seq_l1_loss_e = 0 seq_preceptual_e = 0 for i, batch in enumerate(trainloader): curr_image, curr_depth = batch['image'], batch['depth'] next_image, next_depth = batch['next_image'], batch['next_depth'] curr_image = curr_image.cuda() curr_depth = curr_depth.cuda() next_image = next_image.cuda() next_depth = next_depth.cuda() curr_image_t, curr_depth_t = utils.random_flip( curr_image, curr_depth) next_image_t, next_depth_t = utils.random_flip( next_image, next_depth) # generate random_depth random_index = torch.randperm(opt.batchSize).cuda() rand_image = torch.index_select(curr_image, 0, random_index).detach() rand_depth = torch.index_select(curr_depth, 0, random_index).detach() ####################### train discriminator ####################### # Generate fake image fake_img_curr = model(next_image_t, next_depth_t, curr_depth) fake_img_next = model(curr_image_t, curr_depth_t, next_depth) rand_img_curr = model(curr_image_t, curr_depth_t, rand_depth)
def transformation(data, label): data, label = random_flip(data, label) data, label = random_square_crop(data, label) return data, label
def show_img(img, title=''): cv2.imshow(title, img) cv2.waitKey(0) cv2.destroyAllWindows() def show_imgs(images): for index, img in enumerate(images): show_img(img) # Preprocess for model - START: image = utils.load_image('data', img_path) image_flip, steering_angle = utils.random_flip(image, steering_angle) image_translate, steering_angle = utils.random_translate( image_flip, steering_angle, range_x, range_y) # image_shadow = utils.random_shadow(image_translate) # image_brightness = utils.random_brightness(image_shadow) # image_crop = utils.crop(image_brightness) image_crop = utils.crop(image_translate) image_resize = utils.resize(image_crop) image_rgb2yuv = utils.rgb2yuv(image_resize) # Preprocess for model - END images = [ image, image_flip, image_translate, # image_shadow,
def train_model(args, model, dataset_train, dataset_val): model.train() optimizer = optim.Adam(model.parameters(), lr=1e-4) criterion = nn.MSELoss() step = 0 imgs_per_batch = args.batch_size optimizer.zero_grad() for epoch in range(args.nb_epoch): sampler = RandomSampler(dataset_train, replacement=True, num_samples=args.samples_per_epoch) for i, sample_id in enumerate(sampler): data = dataset_train[sample_id] label = data['steering_angle'] #, data['brake'], data['speed'], data['throttle'] img_pth, label = utils.choose_image(label) img = cv2.imread(data[img_pth]) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = utils.preprocess(img) img, label = utils.random_flip(img, label) img, label = utils.random_translate(img, label, 100, 10) img = utils.random_shadow(img) img = utils.random_brightness(img) img = Variable(torch.cuda.FloatTensor([img])) label = np.array([label]).astype(float) label = Variable(torch.cuda.FloatTensor(label)) img = img.permute(0,3,1,2) out_vec = model(img) loss = criterion(out_vec,label) loss.backward() if step%imgs_per_batch==0: optimizer.step() optimizer.zero_grad() if step%20==0: log_str = \ 'Epoch: {} | Iter: {} | Step: {} | ' + \ 'Train Loss: {:.8f} |' log_str = log_str.format( epoch, i, step, loss.item()) print(log_str) if step%100==0: log_value('train_loss',loss.item(),step) if step%5000==0: val_loss = eval_model(model,dataset_val, num_samples=400) log_value('val_loss',val_loss,step) log_str = \ 'Epoch: {} | Iter: {} | Step: {} | Val Loss: {:.8f}' log_str = log_str.format( epoch, i, step, val_loss) print(log_str) model.train() if step%5000==0: if not os.path.exists(args.model_dir): os.makedirs(args.model_dir) reflex_pth = os.path.join( args.model_dir, 'model_{}'.format(step)) torch.save( model.state_dict(), reflex_pth) step += 1
def train_transformation(data, label): data, label = random_flip(data, label) data = img_norm(data, cfg.rgb_mean, cfg.rgb_std) data = nd.transpose(data, (2, 0, 1)) return data, label