def wandb_logging(d, step, use_wandb=True, prefix="training:"): if use_wandb: wandb.log(d, step=step) _str = "{} step={}\t".format(prefix, step) for k, v in d.items(): _str += "{k}={v:.4f}\t".format(k=k, v=v) logger.info(_str)
def do_test(logger, tdcnn_demo, dataloader_test): logger.info('do test') logger.info(args.test_nms) if torch.cuda.is_available(): tdcnn_demo = tdcnn_demo.cuda() if isinstance(args.gpus, int): args.gpus = [args.gpus] tdcnn_demo = nn.parallel.DataParallel(tdcnn_demo, device_ids=args.gpus) state_dict = torch.load( os.path.join(logger.get_logger_dir(), "best_model.pth"))['model'] logger.info("best_model.pth loaded!") from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): if 'module' not in k: k = 'module.{}'.format(k) if 'modules_focal' in k: k = k.replace('modules_focal', '_fusion_modules') new_state_dict[k] = v tdcnn_demo.load_state_dict(new_state_dict) tdcnn_demo.eval() test_mAP, test_ap = test_net(tdcnn_demo, dataloader=dataloader_test, args=args, split='test', max_per_video=args.max_per_video, thresh=args.thresh) tdcnn_demo.train() logger.info("final test set result: {}".format((test_mAP, test_ap))) logger.info("Congrats~")
def optimize_epoch(self, optimizer, loader, epoch, validation=False): logger.warning(f"Starting epoch {epoch}, validation: {validation} " + "="*30) loss_value = util.AverageMeter() # house keeping self.model.train() if self.lr_schedule(epoch+1) != self.lr_schedule(epoch): files.save_checkpoint_all(self.checkpoint_dir, self.model, args.arch, optimizer, self.L, epoch, lowest=False, save_str='pre-lr-drop') lr = self.lr_schedule(epoch) for pg in optimizer.param_groups: pg['lr'] = lr XE = torch.nn.CrossEntropyLoss() for iter, (data, label, selected) in tqdm(enumerate(loader),desc="epoch={}/{}".format(epoch,args.epochs)): now = time.time() niter = epoch * len(loader) + iter if niter*args.batch_size >= self.optimize_times[-1]: ############ optimize labels ######################################### self.model.headcount = 1 logger.warning('Optimizaton starting') with torch.no_grad(): _ = self.optimize_times.pop() self.optimize_labels(niter) data = data.to(self.dev) mass = data.size(0) final = self.model(data) #################### train CNN #################################################### if self.hc == 1: loss = XE(final, self.L[0, selected]) else: loss = torch.mean(torch.stack([XE(final[h], self.L[h, selected]) for h in range(self.hc)])) optimizer.zero_grad() loss.backward() optimizer.step() loss_value.update(loss.item(), mass) data = 0 # some logging stuff ############################################################## if iter % args.log_iter == 0: if self.writer: self.writer.add_scalar('lr', self.lr_schedule(epoch), niter) logger.info(niter, " Loss: {0:.3f}".format(loss.item())) logger.info(niter, " Freq: {0:.2f}".format(mass/(time.time() - now))) if writer: self.writer.add_scalar('Loss', loss.item(), niter) if iter > 0: self.writer.add_scalar('Freq(Hz)', mass/(time.time() - now), niter) # end of epoch logging ################################################################ if self.writer and (epoch % args.log_intv == 0): util.write_conv(self.writer, self.model, epoch=epoch) files.save_checkpoint_all(self.checkpoint_dir, self.model, args.arch, optimizer, self.L, epoch, lowest=False) return {'loss': loss_value.avg}
def __init__(self, input_features, nf, operator='J2', activation='softmax', ratio=[2, 2, 1, 1], num_operators=1, drop=False): super(Wcompute, self).__init__() logger.info("Wcompute activation type: {}".format(activation)) self.num_features = nf self.operator = operator self.conv2d_1 = nn.Conv2d(input_features, int(nf * ratio[0]), 1, stride=1) self.bn_1 = nn.BatchNorm2d(int(nf * ratio[0])) self.drop = drop if self.drop: self.dropout = nn.Dropout(0.3) self.conv2d_2 = nn.Conv2d(int(nf * ratio[0]), int(nf * ratio[1]), 1, stride=1) self.bn_2 = nn.BatchNorm2d(int(nf * ratio[1])) self.conv2d_3 = nn.Conv2d(int(nf * ratio[1]), nf * ratio[2], 1, stride=1) self.bn_3 = nn.BatchNorm2d(nf * ratio[2]) self.conv2d_4 = nn.Conv2d(nf * ratio[2], nf * ratio[3], 1, stride=1) self.bn_4 = nn.BatchNorm2d(nf * ratio[3]) self.conv2d_last = nn.Conv2d(nf, num_operators, 1, stride=1) self.activation = activation
def opt_sk(model, selflabels_in, epoch): if args.hc == 1: PS = np.zeros((len(trainloader.dataset), args.ncl)) else: PS_pre = np.zeros((len(trainloader.dataset), knn_dim)) for batch_idx, (data, _, _selected) in enumerate(trainloader): data = data.cuda() if args.hc == 1: p = nn.functional.softmax(model(data), 1) PS[_selected, :] = p.detach().cpu().numpy() else: p = model(data) PS_pre[_selected, :] = p.detach().cpu().numpy() if args.hc == 1: selflabels = optimize_L_sk(PS) else: _nmis = np.zeros(args.hc) nh = epoch % args.hc # np.random.randint(args.hc) logger.info("computing head {}".format(nh)) tl = getattr(model, "top_layer{}".format(nh)) # do the forward pass: PS = (PS_pre @ tl.weight.cpu().numpy().T + tl.bias.cpu().numpy()) PS = py_softmax(PS, 1) selflabels_ = optimize_L_sk(PS) selflabels_in[nh] = selflabels_ selflabels = selflabels_in return selflabels
def validate(self): """ Function to validate a training model on the val split. """ logger.info("start validation....") val_loss = 0 label_trues, label_preds = [], [] # Evaluation for batch_idx, (data, target) in tqdm.tqdm( enumerate(self.val_loader), total=len(self.val_loader), desc='Validation iteration = {},epoch={}'.format( self.iteration, self.epoch), leave=False): if self.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) score = self.model(data) loss = CrossEntropyLoss2d_Seg(score, target, size_average=self.size_average) if np.isnan(float(loss.data[0])): raise ValueError('loss is nan while validating') val_loss += float(loss.data[0]) / len(data) lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :] lbl_true = target.data.cpu().numpy() label_trues.append(lbl_true) label_preds.append(lbl_pred) # Computing the metrics acc, acc_cls, mean_iu, _ = torchfcn.utils.label_accuracy_score( label_trues, label_preds, self.n_class) val_loss /= len(self.val_loader) logger.info("iteration={},epoch={},validation mIoU = {}".format( self.iteration, self.epoch, mean_iu)) is_best = mean_iu > self.best_mean_iu if is_best: self.best_mean_iu = mean_iu torch.save( { 'epoch': self.epoch, 'iteration': self.iteration, 'arch': self.model.__class__.__name__, 'optim_state_dict': self.optim.state_dict(), 'model_state_dict': self.model.state_dict(), 'best_mean_iu': self.best_mean_iu, }, osp.join(logger.get_logger_dir(), 'checkpoint.pth.tar')) if is_best: shutil.copy( osp.join(logger.get_logger_dir(), 'checkpoint.pth.tar'), osp.join(logger.get_logger_dir(), 'model_best.pth.tar'))
def get_validation_miou(model): model.eval() for i_val, (images_val, labels_val) in tqdm(enumerate(valloader), total=len(valloader), desc="validation"): if i_val > 5 and is_debug == 1: break if i_val > 200 and is_debug == 2: break #img_large = torch.Tensor(np.zeros((1, 3, 513, 513))) #img_large[:, :, :images_val.shape[2], :images_val.shape[3]] = images_val output = model(Variable(images_val, volatile=True).cuda()) output = output pred = output.data.max(1)[1].cpu().numpy() #pred = output[:, :images_val.shape[2], :images_val.shape[3]] gt = labels_val.numpy() running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() for k, v in score.items(): logger.info("{}: {}".format(k, v)) running_metrics.reset() return score['Mean IoU : \t']
def print_args(): global ARGS data = [] for key, value in vars(ARGS).items(): data.append([key, value]) table = tabulate(data, headers=["name", "value"]) logger.info(colored("Args details", "cyan") + table) return ARGS
def Res_Deeplab(NoLabels=21, pretrained = False, output_all = False): model = MS_Deeplab(Bottleneck,NoLabels, output_all) if pretrained: logger.info("initializing pretrained deeplabv2 model....") model_file = MS_Deeplab.download() state_dict = torch.load(model_file) model.load_state_dict(state_dict) return model
def _decompress_(self): logger.info("\nDecompressing Images...") compressed_file = '%s/compressed/mini_imagenet/images.zip' % self.root if os.path.isfile(compressed_file): os.system('unzip %s -d %s/mini_imagenet/' % (compressed_file, self.root)) else: raise Exception('Missing %s' % compressed_file) logger.info("Decompressed")
def train(epoch, selflabels): logger.info('Epoch: %d' % epoch) adjust_learning_rate(optimizer, epoch) train_loss = AverageMeter() data_time = AverageMeter() batch_time = AverageMeter() # switch to train mode model.train() end = time.time() for batch_idx, (inputs, targets, indexes) in enumerate(trainloader): niter = epoch * len(trainloader) + batch_idx pytorchgo_args.get_args().step += 1 if args.debug and batch_idx>=20:break if niter * trainloader.batch_size >= optimize_times[-1]: with torch.no_grad(): _ = optimize_times.pop() if args.hc >1: feature_return_switch(model, True) selflabels = opt_sk(model, selflabels, epoch) if args.hc >1: feature_return_switch(model, False) data_time.update(time.time() - end) inputs, targets, indexes = inputs.to(device), targets.to(device), indexes.to(device) optimizer.zero_grad() outputs = model(inputs) if args.hc == 1: loss = criterion(outputs, selflabels[indexes]) else: loss = torch.mean(torch.stack([criterion(outputs[h], selflabels[h, indexes]) for h in range(args.hc)])) loss.backward() optimizer.step() train_loss.update(loss.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % 10 == 0: logger.info('Epoch: [{}/{}][{}/{}]' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data: {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f}), best_knn_acc={best_knn_acc}'.format( epoch, args.epochs, batch_idx, len(trainloader), batch_time=batch_time, data_time=data_time, train_loss=train_loss,best_knn_acc=best_acc)) wandb_logging( d=dict(loss1e4=loss.item() * 1e4, group0_lr=optimizer.state_dict()['param_groups'][0]['lr']), step=pytorchgo_args.get_args().step, use_wandb=pytorchgo_args.get_args().wandb, prefix="training epoch {}/{}: ".format(epoch, pytorchgo_args.get_args().epochs)) return selflabels
def load_model(model_name, args): try: model = torch.load('checkpoints/%s/models/%s.t7' % (args.exp_name, model_name)) logger.info('Loading Parameters from the last trained %s Model' % model_name) return model except: logger.info('Initiallize new Network Weights for %s' % model_name) pass return None
def validate(val_loader, model, criterion, iter, log): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() for i, (input, target) in enumerate(val_loader): target = target.cuda(async=True) input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1,5)) losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: output = ('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) logger.info(output) log.write(output + '\n') log.flush() output = ('Testing Results: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}' .format(top1=top1, top5=top5, loss=losses)) print(output) output_best = '\nBest Prec@1: %.3f'%(best_prec1) print(output_best) log.write(output + ' ' + output_best + '\n') log.flush() return top1.avg
def Res_Deeplab(NoLabels=21, pretrained=False): model = MS_Deeplab(Bottleneck, NoLabels) if pretrained: logger.info("initializing pretrained deeplabv2 model....") #model_file = MS_Deeplab.download() saved_state_dict = torch.load( '/home/hutao/data/models/pytorch/MS_DeepLab_resnet_pretrained_COCO_init.pth' ) logger.info("deeplabv2 weight keys: {}".format( saved_state_dict.keys())) new_params = model.state_dict().copy() for i in saved_state_dict: # Scale.layer5.conv2d_list.3.weight i_parts = i.split('.') # print i_parts if NoLabels != 21 and i_parts[1] == 'layer5': logger.info("skip weight: {}".format(i)) continue new_params[i] = saved_state_dict[i] logger.info("recover weight: {}".format(i)) # print i_parts model.load_state_dict(new_params) return model
def adjust_learning_rate(optimizer, epoch): """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" lr = args.lr if args.restart: if epoch == args.epochs // 2: optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=1e-6, nesterov=False) if args.epochs == 200: if epoch >= 80: lr = args.lr * (0.1**((epoch - 80) // 40)) # i.e. 120, 160 logger.info(lr) for param_group in optimizer.param_groups: param_group['lr'] = lr elif args.epochs == 400: if epoch >= 160: lr = args.lr * (0.1**((epoch - 160) // 80)) # i.e. 240,320 logger.info(lr) for param_group in optimizer.param_groups: param_group['lr'] = lr elif args.epochs == 800: if epoch >= 320: lr = args.lr * (0.1**((epoch - 320) // 160)) # i.e. 480, 640 logger.info(lr) for param_group in optimizer.param_groups: param_group['lr'] = lr elif args.epochs == 1600: if epoch >= 640: lr = args.lr * (0.1**((epoch - 640) // 320)) logger.info(lr) for param_group in optimizer.param_groups: param_group['lr'] = lr
def train(self): """ Function to train our model. Calls train_epoch function every epoch. Also performs learning rate annhealing """ max_epoch = int(math.ceil(self.max_iter/min(len(self.train_loader), len(self.target_loader)))) self.max_epoch = max_epoch logger.info("max epoch: {}".format(max_epoch)) for epoch in tqdm.tqdm(range(self.epoch, max_epoch), desc='Train {}/{}'.format(self.epoch, max_epoch)): self.epoch = epoch if self.epoch % 8 == 0 and self.epoch > 0: logger.info("change learning rate!!!") self.optim = step_scheduler(self.optim, self.epoch) self.train_epoch() if self.iteration >= self.max_iter: break
def proceed_test(model_g, model_f1, model_f2, quick_test=1e10): logger.info("proceed test on cityscapes val set...") model_g.eval() model_f1.eval() model_f2.eval() test_img_shape = (2048, 1024) val_img_transform = Compose([ Scale(test_img_shape, Image.BILINEAR), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) val_label_transform = Compose([ Scale(test_img_shape, Image.BILINEAR), # ToTensor() ]) target_loader = data.DataLoader(get_dataset( dataset_name="city16", split="val", img_transform=val_img_transform, label_transform=val_label_transform, test=True, input_ch=3), batch_size=1, pin_memory=True) from tensorpack.utils.stats import MIoUStatistics stat = MIoUStatistics(args.n_class) for index, (origin_imgs, labels, paths) in tqdm(enumerate(target_loader)): if index > quick_test: break path = paths[0] # if index > 10: break imgs = Variable(origin_imgs.cuda(), volatile=True) feature = model_g(imgs) outputs = model_f1(feature) if args.use_f2: outputs += model_f2(feature) pred = outputs[0, :].data.max(0)[1].cpu() feed_predict = np.squeeze(np.uint8(pred.numpy())) feed_label = np.squeeze(np.asarray(labels.numpy())) stat.feed(feed_predict, feed_label) logger.info("tensorpack mIoU: {}".format(stat.mIoU)) logger.info("tensorpack mean_accuracy: {}".format(stat.mean_accuracy)) logger.info("tensorpack accuracy: {}".format(stat.accuracy)) model_g.train() model_f1.train() model_f2.train()
def optimize(self): """Perform full optimization.""" first_epoch = 0 self.model = self.model.to(self.dev) N = len(self.pseudo_loader.dataset) # optimization times (spread exponentially), can also just be linear in practice (i.e. every n-th epoch) self.optimize_times = [(self.num_epochs+2)*N] + \ ((self.num_epochs+1.01)*N*(np.linspace(0, 1, args.nopts)**2)[::-1]).tolist() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, self.model.parameters()), weight_decay=self.weight_decay, momentum=self.momentum, lr=self.lr) if self.checkpoint_dir is not None and self.resume: self.L, first_epoch = files.load_checkpoint_all(self.checkpoint_dir, self.model, optimizer) logger.warning('found first epoch to be', first_epoch) include = [(qq/N >= first_epoch) for qq in self.optimize_times] self.optimize_times = (np.array(self.optimize_times)[include]).tolist() logger.warning('We will optimize L at epochs: {}'.format([np.round(1.0 * t / N, 2) for t in self.optimize_times])) if first_epoch == 0: # initiate labels as shuffled. self.L = np.zeros((self.hc, N), dtype=np.int32) for nh in range(self.hc): for _i in range(N): self.L[nh, _i] = _i % self.outs[nh] self.L[nh] = np.random.permutation(self.L[nh]) self.L = torch.LongTensor(self.L).to(self.dev) # Perform optmization ############################################################### lowest_loss = 1e9 epoch = first_epoch while epoch < (self.num_epochs+1): m = self.optimize_epoch(optimizer, self.train_loader, epoch, validation=False) if m['loss'] < lowest_loss: lowest_loss = m['loss'] files.save_checkpoint_all(self.checkpoint_dir, self.model, args.arch, optimizer, self.L, epoch, lowest=True) epoch += 1 logger.info(f"optimization completed. Saving model to {os.path.join(self.checkpoint_dir,'model_final.pth.tar')}") torch.save(self.model, os.path.join(self.checkpoint_dir, 'model_final.pth.tar')) return self.model
def _train_it(self, it, batch, idx_minor=None, mixrates=None, strategy=None, manilayer_batch=0): self.model.train() if self.lr_scheduler is not None: self.lr_scheduler.step(it) if self.bnm_scheduler is not None: self.bnm_scheduler.step(it) self.optimizer.zero_grad() _, loss, eval_res = self.model_fn(self.model, batch, idx_minor=idx_minor, mixrates=mixrates, strategy=strategy, manilayer_batch=manilayer_batch) if it%50 == 0: logger.info("loss={}".format(loss.item())) loss.backward() self.optimizer.step() return eval_res
def train_net(tdcnn_demo, dataloader, optimizer, args): # setting to train mode tdcnn_demo.train() loss_temp = 0 for step, (support_data, video_data, gt_twins, num_gt) in tqdm( enumerate(dataloader), desc="training epoch {}/{}".format(args.epoch, args.max_epochs)): if is_debug and step > debug_small_iter: break video_data = video_data.cuda() for i in range(args.shot): support_data[i] = support_data[i].cuda() gt_twins = gt_twins.cuda() tdcnn_demo.zero_grad() rois, cls_prob, twin_pred, rpn_loss_cls, rpn_loss_twin, \ RCNN_loss_cls, RCNN_loss_twin, rois_label = tdcnn_demo(video_data, gt_twins, support_data) loss = rpn_loss_cls.mean() + rpn_loss_twin.mean() + RCNN_loss_cls.mean( ) + RCNN_loss_twin.mean() loss_temp += loss.item() # backward optimizer.zero_grad() loss.backward() optimizer.step() if step % args.disp_interval == 0: if step > 0: loss_temp /= args.disp_interval loss_rpn_cls = rpn_loss_cls.mean().item() loss_rpn_twin = rpn_loss_twin.mean().item() loss_rcnn_cls = RCNN_loss_cls.mean().item() loss_rcnn_twin = RCNN_loss_twin.mean().item() fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt gt_cnt = num_gt.sum().item() logger.info("[epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e, best_mAP: %.4f" \ % (args.epoch, step+1, len(dataloader), loss_temp, args.lr, args.best_result)) logger.info("fg/bg=(%d/%d), gt_twins: %d" % ( fg_cnt, bg_cnt, gt_cnt, )) logger.info("rpn_cls: %.4f, rpn_twin: %.4f, rcnn_cls: %.4f, rcnn_twin %.4f" \ % (loss_rpn_cls, loss_rpn_twin, loss_rcnn_cls, loss_rcnn_twin)) if args.best_loss > loss_temp: args.best_loss = loss_temp logger.info("best_loss: %.4f" % (loss_temp)) loss_temp = 0
def train(self): """ Function to train our model. Calls train_epoch function every epoch. Also performs learning rate annhealing """ logger.info("train_loader length: {}".format(len(self.train_loader))) logger.info("target_loader length: {}".format(len(self.target_loader))) iters_per_epoch = min(len(self.target_loader), len(self.train_loader)) self.iters_per_epoch = iters_per_epoch - (iters_per_epoch % self.batch_size) - 1 logger.info("iters_per_epoch :{}".format(self.iters_per_epoch)) self.max_epoch = args.max_epoch for epoch in tqdm.trange(self.epoch, args.max_epoch, desc='Train'): self.epoch = epoch self.optim = step_scheduler(self.optim, self.epoch, base_lr_schedule, "base model") self.optimD = step_scheduler(self.optimD, self.epoch, dis_lr_schedule, "discriminater model") self.model.train() self.netD.train() self.train_epoch() self.model.eval() self.validate() self.model.train() # return to training mode
def optimizer_summary(optim_list): if not isinstance(optim_list, list): optim_list = [optim_list] from operator import mul for optim in optim_list: assert isinstance( optim, torch.optim.Optimizer), ValueError("must be an Optimizer instance") data = [] param_num = 0 for group_id, param_group in enumerate(optim.param_groups): lr = param_group["lr"] weight_decay = param_group["weight_decay"] for id, param in enumerate(param_group["params"]): requires_grad = param.requires_grad shape = list(param.data.size()) param_num += reduce(mul, shape, 1) data.append( [group_id, id, shape, lr, weight_decay, requires_grad]) table = tabulate( data, headers=[ "group", "id", "shape", "lr", "weight_decay", "requires_grad", ], ) logger.info( colored( "Optimizer Summary, Optimzer Parameters: #param={} \n".format( param_num), "cyan", ) + table)
def get_validation_miou(model_g, model_f1, model_f2, quick_test=1e10): if is_debug == 1: quick_test = 2 logger.info("proceed test on cityscapes val set...") model_g.eval() model_f1.eval() model_f2.eval() val_img_transform = Compose([ Scale(train_img_shape, Image.BILINEAR), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) val_label_transform = Compose([Scale(cityscapes_image_shape, Image.NEAREST), # ToTensor() ]) #notice here, training, validation size difference, this is very tricky. target_loader = data.DataLoader(get_dataset(dataset_name="city16", split="val", img_transform=val_img_transform,label_transform=val_label_transform, test=True, input_ch=3), batch_size=1, pin_memory=True) from tensorpack.utils.stats import MIoUStatistics stat = MIoUStatistics(args.n_class) interp = torch.nn.Upsample(size=(cityscapes_image_shape[1], cityscapes_image_shape[0]), mode='bilinear') for index, (origin_imgs, labels, paths) in tqdm(enumerate(target_loader)): if index > quick_test: break path = paths[0] imgs = Variable(origin_imgs.cuda(), volatile=True) feature = model_g(imgs) outputs = model_f1(feature) if args.use_f2: outputs += model_f2(feature) pred = interp(outputs)[0, :].data.max(0)[1].cpu() feed_predict = np.squeeze(np.uint8(pred.numpy())) feed_label = np.squeeze(np.asarray(labels.numpy())) stat.feed(feed_predict, feed_label) logger.info("tensorpack IoU16: {}".format(stat.mIoU_beautify)) logger.info("tensorpack mIoU16: {}".format(stat.mIoU)) model_g.train() model_f1.train() model_f2.train() return stat.mIoU
def optimize_L_sk(PS): N, K = PS.shape tt = time.time() PS = PS.T # now it is K x N r = np.ones((K, 1)) / K c = np.ones((N, 1)) / N PS **= args.lamb # K x N inv_K = 1. / K inv_N = 1. / N err = 1e3 _counter = 0 while err > 1e-2: r = inv_K / (PS @ c) # (KxN)@(N,1) = K x 1 c_new = inv_N / (r.T @ PS).T # ((1,K)@(KxN)).t() = N x 1 if _counter % 10 == 0: err = np.nansum(np.abs(c / c_new - 1)) c = c_new _counter += 1 print("error: ", err, 'step ', _counter, flush=True) # " nonneg: ", sum(I), flush=True) # inplace calculations. PS *= np.squeeze(c) PS = PS.T PS *= np.squeeze(r) PS = PS.T argmaxes = np.nanargmax(PS, 0) # size N newL = torch.LongTensor(argmaxes) selflabels = newL.cuda() PS = PS.T PS /= np.squeeze(r) PS = PS.T PS /= np.squeeze(c) sol = PS[argmaxes, np.arange(N)] np.log(sol, sol) cost = -(1. / args.lamb) * np.nansum(sol) / N logger.info('cost: {}'.format(cost)) logger.info('opt took {0:.2f}min, {1:4d}iters'.format(((time.time() - tt) / 60.), _counter)) return cost, selflabels
def proceed_test(model, input_size, quick_test=1e10): logger.info("proceed test on cityscapes val set...") model.eval() model.cuda() testloader = data.DataLoader(cityscapesDataSet(crop_size=input_size, mean=IMG_MEAN, scale=False, mirror=False, set="val"), batch_size=1, shuffle=False, pin_memory=True) interp = nn.Upsample(size=(cityscape_image_size[1], cityscape_image_size[0]), mode='bilinear') from tensorpack.utils.stats import MIoUStatistics stat = MIoUStatistics(NUM_CLASSES) for index, batch in tqdm(enumerate(testloader), desc="validation"): if index > quick_test: break image, label, _, name = batch image, label = Variable(image, volatile=True), Variable(label) output2 = model(image.cuda()) #(1,19,129,257) output = interp(output2).cpu().data[0].numpy() output = output.transpose(1, 2, 0) output = np.asarray(np.argmax(output, axis=2), dtype=np.uint8) stat.feed(output, label.data.cpu().numpy().squeeze()) miou16 = np.sum(stat.IoU) / 16 print("tensorpack class16 IoU with: {}".format(miou16)) model.train() return miou16
def load_dataset(self, partition, size=(84, 84)): logger.info("Loading dataset") if partition == 'train_val': with open( os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_%s.pickle' % 'train'), 'rb') as handle: data = pickle.load(handle) with open( os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_%s.pickle' % 'val'), 'rb') as handle: data_val = pickle.load(handle) data.update(data_val) del data_val else: with open( os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_%s.pickle' % partition), 'rb') as handle: data = pickle.load(handle) with open( os.path.join(self.root, 'compacted_datasets', 'mini_imagenet_label_encoder.pickle'), 'rb') as handle: label_encoder = pickle.load(handle) # Resize images and normalize for class_ in data: for i in range(len(data[class_])): image2resize = pil_image.fromarray(np.uint8(data[class_][i])) image_resized = image2resize.resize((size[1], size[0])) image_resized = np.array(image_resized, dtype='float32') # Normalize image_resized = np.transpose(image_resized, (2, 0, 1)) image_resized[0, :, :] -= 120.45 # R image_resized[1, :, :] -= 115.74 # G image_resized[2, :, :] -= 104.65 # B image_resized /= 127.5 data[class_][i] = image_resized logger.info("Num classes " + str(len(data))) num_images = 0 for class_ in data: num_images += len(data[class_]) logger.info("Num images " + str(num_images)) return data, label_encoder
def train(self): """ Function to train our model. Calls train_epoch function every epoch. Also performs learning rate annhealing """ logger.info("train_loader length: {}".format(len(self.train_loader))) logger.info("target_loader length: {}".format(len(self.target_loader))) iters_per_epoch = min(len(self.target_loader), len(self.train_loader)) self.iters_per_epoch = iters_per_epoch - (iters_per_epoch % self.batch_size) - 1 logger.info("iters_per_epoch :{}".format(self.iters_per_epoch)) max_epoch = int(math.ceil(self.max_iter / self.iters_per_epoch)) self.max_epoch = max_epoch for epoch in tqdm.trange(self.epoch, max_epoch, desc='Train'): self.epoch = epoch if self.epoch % 8 == 0 and self.epoch > 0: self.optim = step_scheduler(self.optim, self.epoch) self.train_epoch() if self.iteration >= self.max_iter: break
def model_summary(model_list): if not isinstance(model_list, list): model_list = [model_list] from operator import mul for model in model_list: data = [] trainable_param_num = 0 all_param_num = 0 for key, value in model.named_parameters(): data.append([ key, list(value.size()), value.requires_grad, value.dtype, value.device, value.is_leaf, str(value.grad_fn) ]) _num = reduce(mul, list(value.size()), 1) all_param_num += _num if value.requires_grad: trainable_param_num += _num table = tabulate(data, headers=[ "name", "shape", "requires_grad", "dtype", "device", "is_leaf", "grad_fn" ]) logger.warning( " Arg Parameters: #param={}, #param(trainable) = {}".format( all_param_num, trainable_param_num)) logger.info(colored( "Model Summary", "cyan", )) logger.info("\n\n" + table) logger.info(model) return all_param_num, trainable_param_num
def main(): logger.auto_set_dir() global args parser = argparse.ArgumentParser() parser.add_argument('--dataroot', default='/home/hutao/lab/pytorchgo/example/ROAD/data', help='Path to source dataset') parser.add_argument('--batchSize', type=int, default=1, help='input batch size') parser.add_argument('--max_epoch', type=int, default=max_epoch, help='Number of training iterations') parser.add_argument('--optimizer', type=str, default='Adam', help='Optimizer to use | SGD, Adam') parser.add_argument('--lr', type=float, default=base_lr, help='learning rate') parser.add_argument('--momentum', type=float, default=0.99, help='Momentum for SGD') parser.add_argument('--beta1', type=float, default=0.9, help='beta1 for adam. default=0.5') parser.add_argument('--weight_decay', type=float, default=0.0005, help='Weight decay') parser.add_argument('--model', type=str, default='vgg16') parser.add_argument('--gpu', type=int, default=1) args = parser.parse_args() print(args) gpu = args.gpu os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) cuda = torch.cuda.is_available() torch.manual_seed(1337) if cuda: logger.info("random seed 1337") torch.cuda.manual_seed(1337) # Defining data loaders kwargs = { 'num_workers': 4, 'pin_memory': True, 'drop_last': True } if cuda else {} train_loader = torch.utils.data.DataLoader(torchfcn.datasets.SYNTHIA( 'SYNTHIA', args.dataroot, split='train', transform=True, image_size=image_size), batch_size=args.batchSize, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader(torchfcn.datasets.CityScapes( 'cityscapes', args.dataroot, split='val', transform=True, image_size=image_size), batch_size=1, shuffle=False) target_loader = torch.utils.data.DataLoader(torchfcn.datasets.CityScapes( 'cityscapes', args.dataroot, split='train', transform=True, image_size=image_size), batch_size=args.batchSize, shuffle=True) if cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') if args.model == "vgg16": model = origin_model = torchfcn.models.Seg_model(n_class=class_num) vgg16 = torchfcn.models.VGG16(pretrained=True) model.copy_params_from_vgg16(vgg16) model_fix = torchfcn.models.Seg_model(n_class=class_num) model_fix.copy_params_from_vgg16(vgg16) for param in model_fix.parameters(): param.requires_grad = False elif args.model == "deeplabv2": # TODO may have problem! model = origin_model = torchfcn.models.Res_Deeplab( num_classes=class_num, image_size=image_size) saved_state_dict = model_zoo.load_url(Deeplabv2_restore_from) new_params = model.state_dict().copy() for i in saved_state_dict: # Scale.layer5.conv2d_list.3.weight i_parts = i.split('.') # print i_parts if not class_num == 19 or not i_parts[1] == 'layer5': new_params['.'.join(i_parts[1:])] = saved_state_dict[i] # print i_parts model.load_state_dict(new_params) model_fix = torchfcn.models.Res_Deeplab(num_classes=class_num, image_size=image_size) model_fix.load_state_dict(new_params) else: raise ValueError("only support vgg16, deeplabv2!") netD = torchfcn.models.Domain_classifer(reverse=True) netD.apply(weights_init) model_summary([model, netD]) if cuda: model = model.cuda() netD = netD.cuda() # Defining optimizer if args.optimizer == 'SGD': raise ValueError("SGD is not prepared well..") optim = torch.optim.SGD([ { 'params': get_parameters(model, bias=False) }, { 'params': get_parameters(model, bias=True), 'lr': args.lr * 2, 'weight_decay': args.weight_decay }, ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'Adam': if args.model == "vgg16": optim = torch.optim.Adam([ { 'params': get_parameters(model, bias=False), 'weight_decay': args.weight_decay }, { 'params': get_parameters(model, bias=True), 'lr': args.lr * 2, 'weight_decay': args.weight_decay }, ], lr=args.lr, betas=(args.beta1, 0.999)) elif args.model == "deeplabv2": optim = torch.optim.Adam(origin_model.optim_parameters(args.lr), lr=args.lr, betas=(args.beta1, 0.999), weight_decay=args.weight_decay) else: raise else: raise ValueError('Invalid optmizer argument. Has to be SGD or Adam') optimD = torch.optim.Adam(netD.parameters(), lr=dis_lr, weight_decay=args.weight_decay, betas=(0.7, 0.999)) optimizer_summary([optim, optimD]) trainer = MyTrainer_ROAD(cuda=cuda, model=model, model_fix=model_fix, netD=netD, optimizer=optim, optimizerD=optimD, train_loader=train_loader, target_loader=target_loader, val_loader=val_loader, batch_size=args.batchSize, image_size=image_size, loss_print_interval=LOSS_PRINT_INTERVAL) trainer.epoch = 0 trainer.iteration = 0 trainer.train()
def train_epoch(self): """ Function to train the model for one epoch """ def set_requires_grad(seg, dis): for param in self.model.parameters(): param.requires_grad = seg for param in self.netD.parameters(): param.requires_grad = dis for batch_idx, (datas, datat) in tqdm.tqdm( enumerate(itertools.izip(self.train_loader, self.target_loader)), total=self.iters_per_epoch, desc='Train epoch = {}/{}'.format(self.epoch, self.max_epoch)): self.iteration = batch_idx + self.epoch * self.iters_per_epoch source_data, source_labels = datas target_data, __ = datat self.optim.zero_grad() self.optimD.zero_grad() src_dis_label = 1 target_dis_label = 0 if self.cuda: source_data, source_labels = source_data.cuda( ), source_labels.cuda() target_data = target_data.cuda() source_data, source_labels = Variable(source_data), Variable( source_labels) target_data = Variable(target_data) ############train G, item1 #set_requires_grad(seg=True, dis=False) # Source domain score = self.model(source_data) l_seg = CrossEntropyLoss2d_Seg(score, source_labels, class_num=class_num, size_average=self.size_average) # target domain seg_target_score = self.model(target_data) modelfix_target_score = self.model_fix(target_data) diff2d = Diff2d() distill_loss = diff2d(seg_target_score, modelfix_target_score) seg_loss = l_seg + 10 * distill_loss #seg_loss.backward(retain_graph=True) #######train G, item 2 """ bce_loss = torch.nn.BCEWithLogitsLoss() src_discriminate_result = self.netD(score) target_discriminate_result = self.netD(seg_target_score) src_dis_loss = bce_loss(src_discriminate_result, Variable(torch.FloatTensor(src_discriminate_result.data.size()).fill_( src_dis_label)).cuda()) target_dis_loss = bce_loss(target_discriminate_result, Variable( torch.FloatTensor(target_discriminate_result.data.size()).fill_( target_dis_label)).cuda(), ) dis_loss = src_dis_loss + target_dis_loss dis_loss.backward(retain_graph=True) """ #######################train D #set_requires_grad(seg=False, dis=True) bce_loss = torch.nn.BCEWithLogitsLoss() src_discriminate_result = self.netD(score.detach()) target_discriminate_result = self.netD(seg_target_score.detach()) src_dis_loss = bce_loss( src_discriminate_result, Variable( torch.FloatTensor( src_discriminate_result.data.size()).fill_( src_dis_label)).cuda()) target_dis_loss = bce_loss( target_discriminate_result, Variable( torch.FloatTensor( target_discriminate_result.data.size()).fill_( target_dis_label)).cuda(), ) dis_loss = src_dis_loss + target_dis_loss # this loss has been inversed!! total_loss = dis_loss + seg_loss total_loss.backward() self.optim.step() self.optimD.step() if np.isnan(float(dis_loss.data[0])): raise ValueError('dis_loss is nan while training') if np.isnan(float(seg_loss.data[0])): raise ValueError('total_loss is nan while training') if self.iteration % self.loss_print_interval == 0: logger.info( "L_SEG={}, Distill_LOSS={}, Discriminater loss={}".format( l_seg.data[0], distill_loss.data[0], dis_loss.data[0]))