def val_transform(self, rgb, depth): s = self.getFocalScale() depth = np.asfarray( depth, dtype='float32' ) #This used to be the last step, not sure if it goes here? if (self.augArgs.varScale): #Variable global scale simulation scale = self.getDepthGroup() depth_np = depth * scale else: depth_np = depth if (self.augArgs.varFocus): transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.Resize( s ), #Resize both images without correcting the depth values transforms.CenterCrop(self.output_size), ]) else: transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def train_transform(self, im, gt): im = np.array(im).astype(np.float32) gt = np.array(gt).astype(np.float32) s = np.random.uniform(1.0, 1.5) # random scaling angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip color_jitter = my_transforms.ColorJitter(0.4, 0.4, 0.4) transform = my_transforms.Compose([ my_transforms.Crop(130, 10, 240, 1200), my_transforms.Resize(460 / 240, interpolation='bilinear'), my_transforms.Rotate(angle), my_transforms.Resize(s), my_transforms.CenterCrop(self.size), my_transforms.HorizontalFlip(do_flip) ]) im_ = transform(im) im_ = color_jitter(im_) gt_ = transform(gt) im_ = np.array(im_).astype(np.float32) gt_ = np.array(gt_).astype(np.float32) im_ /= 255.0 gt_ /= 100.0 * s im_ = to_tensor(im_) gt_ = to_tensor(gt_) gt_ = gt_.unsqueeze(0) return im_, gt_
def train_transform(self, rgb, depth): #s = np.random.uniform(1.0, 1.5) # random scaling #depth_np = depth / s s = self.getFocalScale() if (self.augArgs.varFocus): #Variable focal length simulation depth_np = depth else: depth_np = depth / s #Correct for focal length if (self.augArgs.varScale): #Variable global scale simulation scale = self.getDepthGroup() depth_np = depth_np * scale angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 # Scipy affine_transform produced RuntimeError when the depth map was # given as a 'numpy.ndarray' depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def val_transform(self, rgb, depth): depth_np = depth / (self.depth_divider) transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def val_transform(self, rgb, depth): depth_np = depth transform = transforms.Compose([ transforms.Crop(130, 10, 220, 1200), transforms.CenterCrop(self.output_size) ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 #Why do this?? depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def val_transform(self, rgb, depth): depth_np = depth transform = transforms.Compose([ transforms.Crop(0, 20, 750, 2000), transforms.Resize(500 / 750), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def val_transform(self, rgb, depth): depth_np = depth transform = transforms.Compose([ #transform.Resize(250.0 / iheight), transforms.Crop(130, 10, 240, 1200), transforms.CenterCrop(self.output_size), transforms.Resize(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def _val_transform(self, rgb, sparse_depth, depth_gt): transform = transforms.Compose([ transforms.Crop(*self._road_crop), transforms.CenterCrop(self.output_size), ]) rgb = transform(rgb) rgb = np.asfarray(rgb, dtype='float') / 255 sparse_depth = np.asfarray(sparse_depth, dtype='float32') sparse_depth = transform(sparse_depth) depth_gt = np.asfarray(depth_gt, dtype='float32') depth_gt = transform(depth_gt) return rgb, sparse_depth, depth_gt
def val_transform(self, im, gt): im = np.array(im).astype(np.float32) gt = np.array(gt).astype(np.float32) transform = my_transforms.Compose([ my_transforms.Crop(130, 10, 240, 1200), my_transforms.Resize(460 / 240, interpolation='bilinear'), my_transforms.CenterCrop(self.size) ]) im_ = transform(im) gt_ = transform(gt) im_ = np.array(im_).astype(np.float32) gt_ = np.array(gt_).astype(np.float32) im_ /= 255.0 gt_ /= 100.0 im_ = to_tensor(im_) gt_ = to_tensor(gt_) gt_ = gt_.unsqueeze(0) return im_, gt_
def train_transform(self, rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth / (s * self.depth_divider) angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 # Scipy affine_transform produced RuntimeError when the depth map was # given as a 'numpy.ndarray' depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def _train_transform(self, rgb, sparse_depth, depth_gt): s = np.random.uniform(1.0, 1.5) # random scaling depth_gt = depth_gt / s # TODO critical why is the input not scaled in original implementation? sparse_depth = sparse_depth / s # TODO adapt and refactor angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation # TODO critical adjust sizes transform = transforms.Compose([ transforms.Crop(*self._road_crop), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb = transform(rgb) sparse_depth = transform(sparse_depth) # TODO needed? # Scipy affine_transform produced RuntimeError when the depth map was # given as a 'numpy.ndarray' depth_gt = np.asfarray(depth_gt, dtype='float32') depth_gt = transform(depth_gt) rgb = self._color_jitter(rgb) # random color jittering # convert color [0,255] -> [0.0, 1.0] floats rgb = np.asfarray(rgb, dtype='float') / 255 return rgb, sparse_depth, depth_gt
def main(cfg, gpus): # Network Builders torch.cuda.set_device(gpus[0]) print('###### Create model ######') net_objectness = ModelBuilder.build_objectness( arch=cfg.MODEL.arch_objectness, weights=cfg.MODEL.weights_enc_query, fix_encoder=cfg.TRAIN.fix_encoder) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), input_dim=cfg.MODEL.decoder_dim, fc_dim=cfg.MODEL.fc_dim, ppm_dim=cfg.MODEL.ppm_dim, num_class=2, weights=cfg.MODEL.weights_decoder, dropout_rate=cfg.MODEL.dropout_rate, use_dropout=cfg.MODEL.use_dropout) crit = nn.NLLLoss(ignore_index=255) print('###### Load data ######') data_name = cfg.DATASET.name if data_name == 'VOC': from dataloaders.customized_objectness import voc_fewshot make_data = voc_fewshot max_label = 20 elif data_name == 'COCO': from dataloaders.customized_objectness import coco_fewshot make_data = coco_fewshot max_label = 80 else: raise ValueError('Wrong config for dataset!') labels = CLASS_LABELS[data_name][cfg.TASK.fold_idx] labels_val = CLASS_LABELS[data_name]['all'] - CLASS_LABELS[data_name][ cfg.TASK.fold_idx] exclude_labels = labels_val value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] train_transform = [ transforms.ToNumpy(), transforms.RandScale([0.9, 1.1]), transforms.RandRotate([-10, 10], padding=mean, ignore_label=0), transforms.RandomGaussianBlur(), transforms.RandomHorizontalFlip(), transforms.Crop([cfg.DATASET.input_size[0], cfg.DATASET.input_size[1]], crop_type='rand', padding=mean, ignore_label=0) ] train_transform = Compose(train_transform) val_transform = Compose([ transforms.ToNumpy(), transforms.Resize_pad(size=cfg.DATASET.input_size[0]) ]) dataset = make_data(base_dir=cfg.DATASET.data_dir, split=cfg.DATASET.data_split, transforms=train_transform, to_tensor=transforms.ToTensorNormalize_noresize(), labels=labels, max_iters=cfg.TRAIN.n_iters * cfg.TRAIN.n_batch, n_ways=cfg.TASK.n_ways, n_shots=cfg.TASK.n_shots, n_queries=cfg.TASK.n_queries, permute=cfg.TRAIN.permute_labels, exclude_labels=exclude_labels) trainloader = DataLoader(dataset, batch_size=cfg.TRAIN.n_batch, shuffle=True, num_workers=4, pin_memory=True, drop_last=True) #segmentation_module = nn.DataParallel(segmentation_module, device_ids=gpus) net_objectness.cuda() net_decoder.cuda() # Set up optimizers nets = (net_objectness, net_decoder, crit) optimizers = create_optimizers(nets, cfg) batch_time = AverageMeter() data_time = AverageMeter() ave_total_loss = AverageMeter() ave_acc = AverageMeter() history = {'train': {'iter': [], 'loss': [], 'acc': []}} net_objectness.train(not cfg.TRAIN.fix_bn) net_decoder.train(not cfg.TRAIN.fix_bn) best_iou = 0 # main loop tic = time.time() print('###### Training ######') for i_iter, sample_batched in enumerate(trainloader): # Prepare input feed_dict = data_preprocess(sample_batched, cfg) data_time.update(time.time() - tic) net_objectness.zero_grad() net_decoder.zero_grad() # adjust learning rate adjust_learning_rate(optimizers, i_iter, cfg) # forward pass feat = net_objectness(feed_dict['img_data'], return_feature_maps=True) pred = net_decoder(feat) loss = crit(pred, feed_dict['seg_label']) acc = pixel_acc(pred, feed_dict['seg_label']) loss = loss.mean() acc = acc.mean() # Backward loss.backward() for optimizer in optimizers: if optimizer: optimizer.step() # measure elapsed time batch_time.update(time.time() - tic) tic = time.time() # update average loss and acc ave_total_loss.update(loss.data.item()) ave_acc.update(acc.data.item() * 100) # calculate accuracy, and display if i_iter % cfg.TRAIN.disp_iter == 0: print('Iter: [{}][{}/{}], Time: {:.2f}, Data: {:.2f}, ' 'lr_encoder: {:.6f}, lr_decoder: {:.6f}, ' 'Accuracy: {:4.2f}, Loss: {:.6f}'.format( i_iter, i_iter, cfg.TRAIN.n_iters, batch_time.average(), data_time.average(), cfg.TRAIN.running_lr_encoder, cfg.TRAIN.running_lr_decoder, ave_acc.average(), ave_total_loss.average())) history['train']['iter'].append(i_iter) history['train']['loss'].append(loss.data.item()) history['train']['acc'].append(acc.data.item()) if (i_iter + 1) % cfg.TRAIN.save_freq == 0: checkpoint(nets, history, cfg, i_iter + 1) if (i_iter + 1) % cfg.TRAIN.eval_freq == 0: metric = Metric(max_label=max_label, n_runs=cfg.VAL.n_runs) with torch.no_grad(): print('----Evaluation----') net_objectness.eval() net_decoder.eval() net_decoder.use_softmax = True for run in range(cfg.VAL.n_runs): print(f'### Run {run + 1} ###') set_seed(cfg.VAL.seed + run) print(f'### Load validation data ###') dataset_val = make_data( base_dir=cfg.DATASET.data_dir, split='val', transforms=val_transform, to_tensor=transforms.ToTensorNormalize_noresize(), labels=labels_val, max_iters=cfg.VAL.n_iters * cfg.VAL.n_batch, n_ways=cfg.TASK.n_ways, n_shots=cfg.TASK.n_shots, n_queries=cfg.TASK.n_queries, permute=cfg.VAL.permute_labels, exclude_labels=[]) if data_name == 'COCO': coco_cls_ids = dataset_val.datasets[ 0].dataset.coco.getCatIds() testloader = DataLoader(dataset_val, batch_size=cfg.VAL.n_batch, shuffle=False, num_workers=1, pin_memory=True, drop_last=False) print(f"Total # of validation Data: {len(dataset)}") #for sample_batched in tqdm.tqdm(testloader): for sample_batched in testloader: feed_dict = data_preprocess(sample_batched, cfg, is_val=True) if data_name == 'COCO': label_ids = [ coco_cls_ids.index(x) + 1 for x in sample_batched['class_ids'] ] else: label_ids = list(sample_batched['class_ids']) feat = net_objectness(feed_dict['img_data'], return_feature_maps=True) query_pred = net_decoder( feat, segSize=cfg.DATASET.input_size) metric.record( np.array(query_pred.argmax(dim=1)[0].cpu()), np.array(feed_dict['seg_label'][0].cpu()), labels=label_ids, n_run=run) classIoU, meanIoU = metric.get_mIoU( labels=sorted(labels_val), n_run=run) classIoU_binary, meanIoU_binary = metric.get_mIoU_binary( n_run=run) classIoU, classIoU_std, meanIoU, meanIoU_std = metric.get_mIoU( labels=sorted(labels_val)) classIoU_binary, classIoU_std_binary, meanIoU_binary, meanIoU_std_binary = metric.get_mIoU_binary( ) print('----- Evaluation Result -----') print(f'best meanIoU_binary: {best_iou}') print(f'meanIoU mean: {meanIoU}') print(f'meanIoU std: {meanIoU_std}') print(f'meanIoU_binary mean: {meanIoU_binary}') print(f'meanIoU_binary std: {meanIoU_std_binary}') if meanIoU_binary > best_iou: best_iou = meanIoU_binary checkpoint(nets, history, cfg, 'best') net_objectness.train(not cfg.TRAIN.fix_bn) net_decoder.train(not cfg.TRAIN.fix_bn) net_decoder.use_softmax = False print('Training Done!')