'loss_rpn_box': loss_rpn_box, 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_box': loss_rcnn_box } for tag, value in info.items(): logger.scalar_summary(tag, value, step) loss_temp = 0 start = time.time() if args.mGPUs: save_name = os.path.join(output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step)) save_checkpoint({ 'session': args.session, 'epoch': epoch + 1, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': args.class_agnostic, }, save_name) else: save_name = os.path.join(output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step)) save_checkpoint({ 'session': args.session, 'epoch': epoch + 1, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': args.class_agnostic, }, save_name) print('save model: {}'.format(save_name))
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets, device, net, optimizer, num_workers, lr, batch_size, start_epoch, max_epochs, lr_decay_gamma, lr_decay_step, resume, load_name, eta, gamma, ef, class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run): args = Args(dataset=dataset_source, dataset_t=dataset_target, imdb_name_target=[], cfg_file=cfg_file, net=net) args = set_dataset_args(args) args_val = Args(dataset=dataset_source, dataset_t=val_datasets, imdb_name_target=[], cfg_file=cfg_file, net=net) args_val = set_dataset_args(args_val, test=True) logger = LoggerForSacred(None, ex, True) if cfg_file is not None: cfg_from_file(cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = True if device == 'cuda' else False device = torch.device(device) output_dir = output_dir + "_{}".format(_run._id) if not os.path.exists(output_dir): os.makedirs(output_dir) dataloader_s, dataloader_t, imdb = init_dataloaders_1s_mixed_mt(args, batch_size, num_workers) val_dataloader_ts, val_imdb_ts = init_val_dataloaders_mt(args_val, 1, num_workers) session = 1 fasterRCNN, lr, optimizer, session, start_epoch, _ = init_htcn_model_optimizer(lr, LA_ATT, MID_ATT, class_agnostic, device, gc, imdb, lc, load_name, net, optimizer, resume, session, start_epoch) if torch.cuda.device_count() > 1: fasterRCNN = nn.DataParallel(fasterRCNN) iters_per_epoch = int(10000 / batch_size) if ef: FL = EFocalLoss(class_num=2, gamma=gamma) else: FL = FocalLoss(class_num=2, gamma=gamma) total_step = 0 for epoch in range(start_epoch, max_epochs + 1): # setting to train mode fasterRCNN.train() if epoch - 1 in lr_decay_step: adjust_learning_rate(optimizer, lr_decay_gamma) lr *= lr_decay_gamma total_step = frcnn_utils.train_htcn_one_epoch(args, FL, total_step, dataloader_s, dataloader_t, iters_per_epoch, fasterRCNN, optimizer, device, eta, logger) save_name = os.path.join(output_dir, 'target_{}_eta_{}_local_{}_global_{}_gamma_{}_session_{}_epoch_{}_total_step_{}.pth'.format( args.dataset_t, args.eta, lc, gc, gamma, session, epoch, total_step)) save_checkpoint({ 'session': session, 'epoch': epoch + 1, 'model': fasterRCNN.module.state_dict() if torch.cuda.device_count() > 1 else fasterRCNN.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': class_agnostic, }, save_name) return 0
logger.add_scalars("logs_s_{}/losses".format(args.session), info, (epoch - 1) * iters_per_epoch + step) loss_temp = 0 start = time.time() save_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step)) save_checkpoint( { 'session': args.session, 'epoch': epoch + 1, 'model': fasterRCNN.module.state_dict() if args.mGPUs else fasterRCNN.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': args.class_agnostic, }, save_name) print('save model: {}'.format(save_name)) if args.use_tfboard: logger.close()
def training_fusion(): # initilize the tensor holder here. im_data1 = torch.FloatTensor(1) im_data2 = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data1 = im_data1.cuda() im_data2 = im_data2.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data1 = Variable(im_data1) im_data2 = Variable(im_data2) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True # initilize the network here. if args.net == 'vgg16f': fasterRCNN = vgg16f(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic, fusion_mode=args.fusion_mode) elif args.net == 'vgg16c': fasterRCNN = vgg16c(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() lr = cfg.TRAIN.LEARNING_RATE lr = args.lr # tr_momentum = cfg.TRAIN.MOMENTUM # tr_momentum = args.momentum params = [] for key, value in dict(fasterRCNN.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY }] if args.cuda: fasterRCNN.cuda() if args.optimizer == "adam": lr = lr * 0.1 optimizer = torch.optim.Adam(params) elif args.optimizer == "sgd": optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM) if args.resume: load_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) print("loading checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) args.session = checkpoint['session'] args.start_epoch = checkpoint['epoch'] fasterRCNN.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr = optimizer.param_groups[0]['lr'] if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print("loaded checkpoint %s" % (load_name)) if args.mGPUs: fasterRCNN = nn.DataParallel(fasterRCNN) iters_per_epoch = int(train_size / args.batch_size) # tfboard if args.use_tfboard: from tensorboardX import SummaryWriter logger = SummaryWriter("logs") #TODO #logger.add_graph(fasterRCNN, (im_data1, im_data2, im_info, gt_boxes, num_boxes)) #TODO if args.net == 'vgg16f': for epoch in range(args.start_epoch, args.max_epochs + 1): # setting to train mode fasterRCNN.train() loss_temp = 0 start = time.time() if epoch % (args.lr_decay_step + 1) == 0: adjust_learning_rate(optimizer, args.lr_decay_gamma) lr *= args.lr_decay_gamma data_iter = iter(dataloader) for step in range(iters_per_epoch): data = next(data_iter) im_data1.resize_(data[0].size()).copy_(data[0]) im_data2.resize_(data[1].size()).copy_(data[1]) im_info.resize_(data[2].size()).copy_(data[2]) gt_boxes.resize_(data[3].size()).copy_(data[3]) num_boxes.resize_(data[4].size()).copy_(data[4]) fasterRCNN.zero_grad() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data1, im_data2, im_info, gt_boxes, num_boxes) loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \ + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean() loss_temp += loss.item() # backward optimizer.zero_grad() loss.backward() if args.net == "vgg16": clip_gradient(fasterRCNN, 10.) optimizer.step() if step % args.disp_interval == 0: end = time.time() if step > 0: loss_temp /= (args.disp_interval + 1) if args.mGPUs: loss_rpn_cls = rpn_loss_cls.mean().item() loss_rpn_box = rpn_loss_box.mean().item() loss_rcnn_cls = RCNN_loss_cls.mean().item() loss_rcnn_box = RCNN_loss_bbox.mean().item() fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt else: loss_rpn_cls = rpn_loss_cls.item() loss_rpn_box = rpn_loss_box.item() loss_rcnn_cls = RCNN_loss_cls.item() loss_rcnn_box = RCNN_loss_bbox.item() fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt Log.info("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \ % (args.session, epoch, step, iters_per_epoch, loss_temp, lr)) Log.info("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end - start)) Log.info("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \ % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box)) if args.use_tfboard: info = { 'loss': loss_temp, 'loss_rpn_cls': loss_rpn_cls, 'loss_rpn_box': loss_rpn_box, 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_box': loss_rcnn_box } logger.add_scalars( "logs_s_{}/losses".format(args.session), info, (epoch - 1) * iters_per_epoch + step) loss_temp = 0 start = time.time() save_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step)) save_checkpoint( { 'session': args.session, 'epoch': epoch + 1, 'model': fasterRCNN.module.state_dict() if args.mGPUs else fasterRCNN.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': args.class_agnostic, }, save_name) print('save model: {}'.format(save_name)) elif args.net == 'vgg16c': for epoch in range(args.start_epoch, args.max_epochs + 1): # setting to train mode fasterRCNN.train() loss_temp = 0 start = time.time() if epoch % (args.lr_decay_step + 1) == 0: adjust_learning_rate(optimizer, args.lr_decay_gamma) lr *= args.lr_decay_gamma data_iter = iter(dataloader) for step in range(iters_per_epoch): data = next(data_iter) im_data1.resize_(data[0].size()).copy_(data[0]) im_data2.resize_(data[1].size()).copy_(data[1]) im_info.resize_(data[2].size()).copy_(data[2]) gt_boxes.resize_(data[3].size()).copy_(data[3]) num_boxes.resize_(data[4].size()).copy_(data[4]) fasterRCNN.zero_grad() rois, cls_prob, bbox_pred, \ rpn_loss_cls1, rpn_loss_box1, \ rpn_loss_cls2, rpn_loss_box2, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data1, im_data2, im_info, gt_boxes, num_boxes) loss = rpn_loss_cls1.mean() + rpn_loss_box1.mean() \ + rpn_loss_cls2.mean() + rpn_loss_box2.mean() \ + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean() loss_temp += loss.item() # backward optimizer.zero_grad() loss.backward() if args.net == "vgg16": clip_gradient(fasterRCNN, 10.) optimizer.step() if step % args.disp_interval == 0: end = time.time() if step > 0: loss_temp /= (args.disp_interval + 1) if args.mGPUs: loss_rpn_cls1 = rpn_loss_cls1.mean().item() loss_rpn_cls2 = rpn_loss_cls2.mean().item() loss_rpn_box1 = rpn_loss_box1.mean().item() loss_rpn_box2 = rpn_loss_box2.mean().item() loss_rcnn_cls = RCNN_loss_cls.mean().item() loss_rcnn_box = RCNN_loss_bbox.mean().item() fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt else: loss_rpn_cls1 = rpn_loss_cls1.item() loss_rpn_cls2 = rpn_loss_cls2.item() loss_rpn_box1 = rpn_loss_box1.item() loss_rpn_box2 = rpn_loss_box2.item() loss_rcnn_cls = RCNN_loss_cls.item() loss_rcnn_box = RCNN_loss_bbox.item() fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt Log.info("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \ % (args.session, epoch, step, iters_per_epoch, loss_temp, lr)) Log.info("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end - start)) Log.info("\t\t\trpn_cls1: %.4f,rpn_cls2: %.4f, rpn_box1: %.4f, rpn_box2: %.4f,rcnn_cls: %.4f, rcnn_box %.4f" \ % (loss_rpn_cls1, loss_rpn_cls2,loss_rpn_box1,loss_rpn_box2, loss_rcnn_cls, loss_rcnn_box)) if args.use_tfboard: info = { 'loss': loss_temp, 'loss_rpn_cls1': loss_rpn_cls1, 'loss_rpn_cls2': loss_rpn_cls2, 'loss_rpn_box1': loss_rpn_box1, 'loss_rpn_box2': loss_rpn_box2, 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_box': loss_rcnn_box } logger.add_scalars( "logs_s_{}/losses".format(args.session), info, (epoch - 1) * iters_per_epoch + step) loss_temp = 0 start = time.time() save_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step)) save_checkpoint( { 'session': args.session, 'epoch': epoch + 1, 'model': fasterRCNN.module.state_dict() if args.mGPUs else fasterRCNN.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': args.class_agnostic, }, save_name) print('save model: {}'.format(save_name)) #logger.add_graph(fasterRCNN, (im_data1, im_data2, im_info, gt_boxes, num_boxes)) if args.use_tfboard: logger.close()