'weight_decay': 0.0005 }] lr = lr * 0.1 optimizer = torch.optim.Adam(params) epochs = 40 n_devs = torch.cuda.device_count() for epoch in range(epochs): print(' ============\n| Epoch {:0>2}/{:0>2} |\n ============'.format( epoch + 1, epochs)) if (epoch + 1) % (lr_decay_step) == 0: adjust_learning_rate(optimizer, lr_decay_gamma) lr *= lr_decay_gamma print('adjust learning rate {}...'.format(lr)) act_model, loss = training(epoch, device, act_model, train_data_loader, mode=4) if (epoch + 1) % 5 == 0: torch.save( act_model.state_dict(), "action_net_model_steady_anchors_roi_align_ok.pwf".format( epoch + 1))
def train_net(): net = Resnet18_8s_modified(ver_dim=vote_num * 2, seg_dim=2) #net=NetWrapper(net) net = MappingNetWrapper(net) net = DataParallel(net).cuda() optimizer = optim.Adam(net.parameters(), lr=train_cfg['lr']) model_dir = os.path.join(cfg.MODEL_DIR, train_cfg['model_name']) motion_model = train_cfg['motion_model'] print('motion state {}'.format(motion_model)) if args.test_model: begin_epoch = load_model(net.module.net, optimizer, model_dir, args.load_epoch) if args.normal: print('testing normal linemod ...') image_db = LineModImageDB(args.linemod_cls, has_render_set=False, has_fuse_set=False) test_db = image_db.test_real_set + image_db.val_real_set test_set = LineModDatasetAug(test_db, cfg.LINEMOD, vote_type, augment=False, use_motion=motion_model) test_sampler = SequentialSampler(test_set) test_batch_sampler = ImageSizeBatchSampler( test_sampler, train_cfg['test_batch_size'], False) test_loader = DataLoader(test_set, batch_sampler=test_batch_sampler, num_workers=0) prefix = 'test' if args.use_test_set else 'val' val(net, test_loader, begin_epoch, prefix, use_motion=motion_model) # if args.occluded and args.linemod_cls in cfg.occ_linemod_cls_names: # print('testing occluded linemod ...') # occ_image_db = OcclusionLineModImageDB(args.linemod_cls) # occ_test_db = occ_image_db.test_real_set # occ_test_set = LineModDatasetAug(occ_test_db, # cfg.OCCLUSION_LINEMOD, # vote_type, # augment=False, # use_motion=motion_model) # occ_test_sampler = SequentialSampler(occ_test_set) # occ_test_batch_sampler = ImageSizeBatchSampler( # occ_test_sampler, train_cfg['test_batch_size'], False) # occ_test_loader = DataLoader(occ_test_set, # batch_sampler=occ_test_batch_sampler, # num_workers=0) # prefix = 'occ_test' if args.use_test_set else 'occ_val' # val(net, # occ_test_loader, # begin_epoch, # prefix, # use_motion=motion_model) # if args.truncated: # print('testing truncated linemod ...') # trun_image_db = TruncatedLineModImageDB(args.linemod_cls) # print(len(trun_image_db.set)) # trun_image_set = LineModDatasetRealAug(trun_image_db.set, # cfg.LINEMOD, # vote_type, # augment=False, # use_intrinsic=True, # use_motion=motion_model) # trun_test_sampler = SequentialSampler(trun_image_set) # trun_test_batch_sampler = ImageSizeBatchSampler( # trun_test_sampler, train_cfg['test_batch_size'], False) # trun_test_loader = DataLoader( # trun_image_set, # batch_sampler=trun_test_batch_sampler, # num_workers=0) # prefix = 'trun_test' # val(net, # trun_test_loader, # begin_epoch, # prefix, # True, # use_motion=motion_model) else: begin_epoch = 0 if train_cfg['resume']: begin_epoch = load_model(net.module.net, optimizer, model_dir) image_db = LineModImageDB( args.linemod_cls, has_fuse_set=False, #train_cfg['use_fuse'], has_render_set=False) #True) # train_db=[] # train_db+=image_db.render_set # if train_cfg['use_real_train']: # train_db+=image_db.train_real_set # if train_cfg['use_fuse']: # train_db+=image_db.fuse_set train_db = image_db.train_real_set train_set = LineModDatasetAug(train_db, cfg.LINEMOD, vote_type, augment=True, cfg=train_cfg['aug_cfg'], use_motion=motion_model) train_sampler = RandomSampler(train_set) train_batch_sampler = ImageSizeBatchSampler( train_sampler, train_cfg['train_batch_size'], False, cfg=train_cfg['aug_cfg']) train_loader = DataLoader(train_set, batch_sampler=train_batch_sampler, num_workers=12) val_db = image_db.val_real_set val_set = LineModDatasetAug(val_db, cfg.LINEMOD, vote_type, augment=False, cfg=train_cfg['aug_cfg'], use_motion=motion_model) val_sampler = SequentialSampler(val_set) val_batch_sampler = ImageSizeBatchSampler(val_sampler, train_cfg['test_batch_size'], False, cfg=train_cfg['aug_cfg']) val_loader = DataLoader(val_set, batch_sampler=val_batch_sampler, num_workers=12) # if args.linemod_cls in cfg.occ_linemod_cls_names: # occ_image_db=OcclusionLineModImageDB(args.linemod_cls) # occ_val_db=occ_image_db.test_real_set[:len(occ_image_db.test_real_set)//2] # occ_val_set = LineModDatasetRealAug(occ_val_db, cfg.OCCLUSION_LINEMOD, vote_type, augment=False, cfg=train_cfg['aug_cfg'], use_motion=motion_model) # occ_val_sampler = SequentialSampler(occ_val_set) # occ_val_batch_sampler = ImageSizeBatchSampler(occ_val_sampler, train_cfg['test_batch_size'], False, cfg=train_cfg['aug_cfg']) # occ_val_loader = DataLoader(occ_val_set, batch_sampler=occ_val_batch_sampler, num_workers=12) for epoch in range(begin_epoch, train_cfg['epoch_num']): adjust_learning_rate(optimizer, epoch, train_cfg['lr_decay_rate'], train_cfg['lr_decay_epoch']) train(net, optimizer, train_loader, epoch) val(net, val_loader, epoch, use_motion=motion_model) # if args.linemod_cls in cfg.occ_linemod_cls_names: # val(net, occ_val_loader, epoch, 'occ_val',use_motion=motion_model) save_model(net.module.net, optimizer, epoch, model_dir)
def train_net(): #seg_dim 为啥两张,kp 个数,×2,? 一张一个x坐标,一张一个y坐标,不是一张图片估计 一个关键点吗 net = Resnet18_8s(ver_dim=vote_num * 2, seg_dim=2) #前传 误差定义 评价 net = NetWrapper(net) #多卡 #net=DataParallel(net,device_ids=[0,1]).cuda() net = DataParallel(net, device_ids=[0]).cuda() #优化方法 optimizer = optim.Adam(net.parameters(), lr=train_cfg['lr']) model_dir = os.path.join(cfg.MODEL_DIR, train_cfg['model_name']) motion_model = train_cfg['motion_model'] print('motion state {}'.format(motion_model)) if args.test_model: begin_epoch = load_model(net.module.net, optimizer, model_dir, args.load_epoch) if args.normal: print('testing normal linemod ...') image_db = LineModImageDB(args.linemod_cls, has_render_set=False, has_fuse_set=False) test_db = image_db.test_real_set + image_db.val_real_set test_set = LineModDatasetRealAug(test_db, cfg.LINEMOD, vote_type, augment=False, use_motion=motion_model) test_sampler = SequentialSampler(test_set) test_batch_sampler = ImageSizeBatchSampler( test_sampler, train_cfg['test_batch_size'], False) test_loader = DataLoader(test_set, batch_sampler=test_batch_sampler, num_workers=0) prefix = 'test' if args.use_test_set else 'val' val(net, test_loader, begin_epoch, prefix, use_motion=motion_model) if args.occluded and args.linemod_cls in cfg.occ_linemod_cls_names: print('testing occluded linemod ...') occ_image_db = OcclusionLineModImageDB(args.linemod_cls) occ_test_db = occ_image_db.test_real_set occ_test_set = LineModDatasetRealAug(occ_test_db, cfg.OCCLUSION_LINEMOD, vote_type, augment=False, use_motion=motion_model) occ_test_sampler = SequentialSampler(occ_test_set) occ_test_batch_sampler = ImageSizeBatchSampler( occ_test_sampler, train_cfg['test_batch_size'], False) occ_test_loader = DataLoader(occ_test_set, batch_sampler=occ_test_batch_sampler, num_workers=0) prefix = 'occ_test' if args.use_test_set else 'occ_val' val(net, occ_test_loader, begin_epoch, prefix, use_motion=motion_model) if args.truncated: print('testing truncated linemod ...') trun_image_db = TruncatedLineModImageDB(args.linemod_cls) print(len(trun_image_db.set)) trun_image_set = LineModDatasetRealAug(trun_image_db.set, cfg.LINEMOD, vote_type, augment=False, use_intrinsic=True, use_motion=motion_model) trun_test_sampler = SequentialSampler(trun_image_set) trun_test_batch_sampler = ImageSizeBatchSampler( trun_test_sampler, train_cfg['test_batch_size'], False) trun_test_loader = DataLoader( trun_image_set, batch_sampler=trun_test_batch_sampler, num_workers=0) prefix = 'trun_test' val(net, trun_test_loader, begin_epoch, prefix, True, use_motion=motion_model) else: # //train from finetune,or train from 0 # "resume":true, # //this para no use # "finetune":false, begin_epoch = 0 # train from finetune if train_cfg['resume']: begin_epoch = load_model(net.module.net, optimizer, model_dir) #cat already render # image_db = LineModImageDB(args.linemod_cls, # has_fuse_set=train_cfg['use_fuse'], # has_render_set=True) image_db = LineModImageDB(args.linemod_cls, has_fuse_set=train_cfg['use_fuse'], has_render_set=False) train_db = [] #三类训练数据吗,render,real,fuse?? 阶段数据呢,,遮挡数据呢 train_db += image_db.render_set if train_cfg['use_real_train']: train_db += image_db.train_real_set if train_cfg['use_fuse']: train_db += image_db.fuse_set train_set = LineModDatasetRealAug(train_db, cfg.LINEMOD, vote_type, augment=True, cfg=train_cfg['aug_cfg'], use_motion=motion_model) train_sampler = RandomSampler(train_set) train_batch_sampler = ImageSizeBatchSampler( train_sampler, train_cfg['train_batch_size'], False, cfg=train_cfg['aug_cfg']) train_loader = DataLoader(train_set, batch_sampler=train_batch_sampler, num_workers=12) val_db = image_db.val_real_set val_set = LineModDatasetRealAug(val_db, cfg.LINEMOD, vote_type, augment=False, cfg=train_cfg['aug_cfg'], use_motion=motion_model) val_sampler = SequentialSampler(val_set) val_batch_sampler = ImageSizeBatchSampler(val_sampler, train_cfg['test_batch_size'], False, cfg=train_cfg['aug_cfg']) val_loader = DataLoader(val_set, batch_sampler=val_batch_sampler, num_workers=12) if args.linemod_cls in cfg.occ_linemod_cls_names: occ_image_db = OcclusionLineModImageDB(args.linemod_cls) occ_val_db = occ_image_db.test_real_set[:len(occ_image_db. test_real_set) // 2] occ_val_set = LineModDatasetRealAug(occ_val_db, cfg.OCCLUSION_LINEMOD, vote_type, augment=False, cfg=train_cfg['aug_cfg'], use_motion=motion_model) occ_val_sampler = SequentialSampler(occ_val_set) occ_val_batch_sampler = ImageSizeBatchSampler( occ_val_sampler, train_cfg['test_batch_size'], False, cfg=train_cfg['aug_cfg']) occ_val_loader = DataLoader(occ_val_set, batch_sampler=occ_val_batch_sampler, num_workers=12) for epoch in range(begin_epoch, train_cfg['epoch_num']): adjust_learning_rate(optimizer, epoch, train_cfg['lr_decay_rate'], train_cfg['lr_decay_epoch']) print("xx") train(net, optimizer, train_loader, epoch) val(net, val_loader, epoch, use_motion=motion_model) if args.linemod_cls in cfg.occ_linemod_cls_names: val(net, occ_val_loader, epoch, 'occ_val', use_motion=motion_model) save_model(net.module.net, optimizer, epoch, model_dir)
def train_net(): #print("Out-location", out_location) net = Resnet18_8s(ver_dim=vote_num * 2, seg_dim=2) net = NetWrapper(net) net = DataParallel(net).cuda() optimizer = optim.Adam(net.parameters(), lr=train_cfg['lr']) model_dir = os.path.join(cfg.MODEL_DIR, train_cfg['model_name']) print(model_dir) motion_model = train_cfg['motion_model'] print('motion state {}'.format(motion_model)) if args.test_model: begin_epoch = load_model(net.module.net, optimizer, model_dir, args.load_epoch) print('testing ...') val_db = ValidationDatasetIntake.getval_dataset( ) #image_db.val_real_set print("val_db - ", len(val_db)) #print(val_db) val_set = HomemadeDataset(val_db, cfg.HOMEMADE, vote_type, augment=False, cfg=train_cfg['homemade_cfg'], use_motion=motion_model) print("val set!", val_set) val_sampler = SequentialSampler(val_set) val_batch_sampler = ImageSizeBatchSampler( val_sampler, train_cfg['test_batch_size'], False, cfg=train_cfg['homemade_cfg']) val_loader = DataLoader(val_set, batch_sampler=val_batch_sampler, num_workers=0) val(net, val_loader, begin_epoch, use_motion=motion_model) #val(net, val_loader, epoch,use_motion=motion_model) if args.occluded and args.homemade_cls in cfg.occ_homemade_cls_names: print('testing occluded dataset ...') occ_image_db = OcclusionHomemadeImageDB(args.homemade_cls) occ_test_db = occ_image_db.test_real_set occ_test_set = HomemadeDataset(occ_test_db, cfg.OCCLUSION_HOMEMADE, vote_type, augment=False, use_motion=motion_model) occ_test_sampler = SequentialSampler(occ_test_set) occ_test_batch_sampler = ImageSizeBatchSampler( occ_test_sampler, train_cfg['test_batch_size'], False) occ_test_loader = DataLoader(occ_test_set, batch_sampler=occ_test_batch_sampler, num_workers=0) prefix = 'occ_test' if args.use_test_set else 'occ_val' val(net, occ_test_loader, begin_epoch, prefix, use_motion=motion_model) if args.truncated: print('testing truncated dataset ...') trun_image_db = TruncatedHomemadeImageDB(args.homemade_cls) print(len(trun_image_db.set)) trun_image_set = HomemadeDataset(trun_image_db.set, cfg.HOMEMADE, vote_type, augment=False, use_intrinsic=True, use_motion=motion_model) trun_test_sampler = SequentialSampler(trun_image_set) trun_test_batch_sampler = ImageSizeBatchSampler( trun_test_sampler, train_cfg['test_batch_size'], False) trun_test_loader = DataLoader( trun_image_set, batch_sampler=trun_test_batch_sampler, num_workers=0) prefix = 'trun_test' val(net, trun_test_loader, begin_epoch, prefix, True, use_motion=motion_model) else: begin_epoch = 0 print("Train_cfg[resume] - ", train_cfg['resume']) if train_cfg['resume']: begin_epoch = load_model(net.module.net, optimizer, model_dir) print("class", args.homemade_cls, "use fuse: ", train_cfg['use_fuse']) image_db = HomemadeImageDB(args.homemade_cls, has_fuse_set=train_cfg['use_fuse'], has_render_set=True) train_db = [] train_db += image_db.render_set print("train_db after render set: ", len(train_db)) if train_cfg['use_real_train']: train_db += image_db.train_real_set print("DB After real train set: ", len(train_db)) #we have no real images if train_cfg['use_fuse']: train_db += image_db.fuse_set print("DB After fuse set: ", len(train_db)) train_set = HomemadeDataset(train_db, cfg.HOMEMADE, vote_type, augment=True, cfg=train_cfg['homemade_cfg'], use_motion=motion_model) train_sampler = RandomSampler(train_set) train_batch_sampler = ImageSizeBatchSampler( train_sampler, train_cfg['train_batch_size'], False, cfg=train_cfg['homemade_cfg']) train_loader = DataLoader(train_set, batch_sampler=train_batch_sampler, num_workers=12) val_db = ValidationDatasetIntake.getval_dataset( ) #image_db.val_real_set print("val_db - ", len(val_db)) #print(val_db) val_set = HomemadeDataset(val_db, cfg.HOMEMADE, vote_type, augment=False, cfg=train_cfg['homemade_cfg'], use_motion=motion_model) print("val set!", val_set) val_sampler = SequentialSampler(val_set) val_batch_sampler = ImageSizeBatchSampler( val_sampler, train_cfg['test_batch_size'], False, cfg=train_cfg['homemade_cfg']) val_loader = DataLoader(val_set, batch_sampler=val_batch_sampler, num_workers=12) if args.homemade_cls in cfg.occ_homemade_cls_names: occ_image_db = OcclusionHomemadeImageDB(args.homemade_cls) occ_val_db = occ_image_db.test_real_set[:len(occ_image_db. test_real_set) // 2] occ_val_set = HomemadeDataset(occ_val_db, cfg.OCCLUSION_LINEMOD, vote_type, augment=False, cfg=train_cfg['homemade_cfg'], use_motion=motion_model) occ_val_sampler = SequentialSampler(occ_val_set) occ_val_batch_sampler = ImageSizeBatchSampler( occ_val_sampler, train_cfg['test_batch_size'], False, cfg=train_cfg['homemade_cfg']) occ_val_loader = DataLoader(occ_val_set, batch_sampler=occ_val_batch_sampler, num_workers=12) for epoch in range(begin_epoch, train_cfg['epoch_num']): adjust_learning_rate(optimizer, epoch, train_cfg['lr_decay_rate'], train_cfg['lr_decay_epoch']) train(net, optimizer, train_loader, epoch) #val(net, val_loader, epoch,use_motion=motion_model) if args.homemade_cls in cfg.occ_homemade_cls_names: val(net, occ_val_loader, epoch, 'occ_val', use_motion=motion_model) save_model(net.module.net, optimizer, epoch, model_dir)
val_set = LineModDatasetRealAug(val_db, cfg.LINEMOD, vote_type, augment=False, cfg=train_cfg['aug_cfg'], use_motion=motion_model) val_sampler = SequentialSampler(val_set) val_batch_sampler = ImageSizeBatchSampler(val_sampler, train_cfg['test_batch_size'], False, cfg=train_cfg['aug_cfg']) val_loader = DataLoader(val_set, batch_sampler=val_batch_sampler, num_workers=12) if args.linemod_cls in cfg.occ_linemod_cls_names: occ_image_db=OcclusionLineModImageDB(args.linemod_cls) occ_val_db=occ_image_db.test_real_set[:len(occ_image_db.test_real_set)//2] occ_val_set = LineModDatasetRealAug(occ_val_db, cfg.OCCLUSION_LINEMOD, vote_type, augment=False, cfg=train_cfg['aug_cfg'], use_motion=motion_model) occ_val_sampler = SequentialSampler(occ_val_set) occ_val_batch_sampler = ImageSizeBatchSampler(occ_val_sampler, train_cfg['test_batch_size'], False, cfg=train_cfg['aug_cfg']) occ_val_loader = DataLoader(occ_val_set, batch_sampler=occ_val_batch_sampler, num_workers=12) #for epoch in range(begin_epoch, train_cfg['epoch_num']): epoch = 0 adjust_learning_rate(optimizer,epoch,train_cfg['lr_decay_rate'],train_cfg['lr_decay_epoch']) #train(net, optimizer, train_loader, epoch) dataloader = train_loader for rec in recs: rec.reset() data_time.reset() batch_time.reset() train_begin=time.time() net.train() size = len(dataloader) end=time.time() #for idx, data in enumerate(dataloader):
def train_net(): net = Resnet18_8s(ver_dim=vote_num * 2, seg_dim=2) net = NetWrapper(net) net = DataParallel(net).cuda() optimizer = optim.Adam(net.parameters(), lr=train_cfg["lr"]) model_dir = os.path.join(cfg.MODEL_DIR, train_cfg["model_name"]) motion_model = train_cfg["motion_model"] print("motion state {}".format(motion_model)) if args.test_model: begin_epoch = load_model(net.module.net, optimizer, model_dir, args.load_epoch) if args.normal: print("testing normal linemod ...") image_db = LineModImageDB(args.linemod_cls, has_render_set=False, has_fuse_set=False) test_db = image_db.test_real_set + image_db.val_real_set test_set = LineModDatasetRealAug(test_db, cfg.LINEMOD, vote_type, augment=False, use_motion=motion_model) test_sampler = SequentialSampler(test_set) test_batch_sampler = ImageSizeBatchSampler( test_sampler, train_cfg["test_batch_size"], False) test_loader = DataLoader(test_set, batch_sampler=test_batch_sampler, num_workers=0) prefix = "test" if args.use_test_set else "val" val(net, test_loader, begin_epoch, prefix, use_motion=motion_model) if args.occluded and args.linemod_cls in cfg.occ_linemod_cls_names: print("testing occluded linemod ...") occ_image_db = OcclusionLineModImageDB(args.linemod_cls) occ_test_db = occ_image_db.test_real_set occ_test_set = LineModDatasetRealAug( occ_test_db, cfg.OCCLUSION_LINEMOD, vote_type, augment=False, use_motion=motion_model, ) occ_test_sampler = SequentialSampler(occ_test_set) occ_test_batch_sampler = ImageSizeBatchSampler( occ_test_sampler, train_cfg["test_batch_size"], False) occ_test_loader = DataLoader(occ_test_set, batch_sampler=occ_test_batch_sampler, num_workers=0) prefix = "occ_test" if args.use_test_set else "occ_val" val(net, occ_test_loader, begin_epoch, prefix, use_motion=motion_model) if args.truncated: print("testing truncated linemod ...") trun_image_db = TruncatedLineModImageDB(args.linemod_cls) print(len(trun_image_db.set)) trun_image_set = LineModDatasetRealAug( trun_image_db.set, cfg.LINEMOD, vote_type, augment=False, use_intrinsic=True, use_motion=motion_model, ) trun_test_sampler = SequentialSampler(trun_image_set) trun_test_batch_sampler = ImageSizeBatchSampler( trun_test_sampler, train_cfg["test_batch_size"], False) trun_test_loader = DataLoader( trun_image_set, batch_sampler=trun_test_batch_sampler, num_workers=0) prefix = "trun_test" val( net, trun_test_loader, begin_epoch, prefix, True, use_motion=motion_model, ) else: begin_epoch = 0 if train_cfg["resume"]: begin_epoch = load_model(net.module.net, optimizer, model_dir) image_db = LineModImageDB(args.linemod_cls, has_fuse_set=train_cfg["use_fuse"], has_render_set=False) train_db = [] train_db += image_db.render_set if train_cfg["use_real_train"]: train_db += image_db.train_real_set if train_cfg["use_fuse"]: train_db += image_db.fuse_set train_set = LineModDatasetRealAug( train_db, cfg.LINEMOD, vote_type, augment=True, cfg=train_cfg["aug_cfg"], use_motion=motion_model, ) train_sampler = RandomSampler(train_set) train_batch_sampler = ImageSizeBatchSampler( train_sampler, train_cfg["train_batch_size"], False, cfg=train_cfg["aug_cfg"], ) train_loader = DataLoader(train_set, batch_sampler=train_batch_sampler, num_workers=12) val_db = image_db.val_real_set val_set = LineModDatasetRealAug( val_db, cfg.LINEMOD, vote_type, augment=False, cfg=train_cfg["aug_cfg"], use_motion=motion_model, ) val_sampler = SequentialSampler(val_set) val_batch_sampler = ImageSizeBatchSampler(val_sampler, train_cfg["test_batch_size"], False, cfg=train_cfg["aug_cfg"]) val_loader = DataLoader(val_set, batch_sampler=val_batch_sampler, num_workers=12) if args.use_occlussion and args.linemod_cls in cfg.occ_linemod_cls_names: occ_image_db = OcclusionLineModImageDB(args.linemod_cls) occ_val_db = occ_image_db.test_real_set[:len(occ_image_db. test_real_set) // 2] occ_val_set = LineModDatasetRealAug( occ_val_db, cfg.OCCLUSION_LINEMOD, vote_type, augment=False, cfg=train_cfg["aug_cfg"], use_motion=motion_model, ) occ_val_sampler = SequentialSampler(occ_val_set) occ_val_batch_sampler = ImageSizeBatchSampler( occ_val_sampler, train_cfg["test_batch_size"], False, cfg=train_cfg["aug_cfg"], ) occ_val_loader = DataLoader(occ_val_set, batch_sampler=occ_val_batch_sampler, num_workers=12) for epoch in range(begin_epoch, train_cfg["epoch_num"]): adjust_learning_rate( optimizer, epoch, train_cfg["lr_decay_rate"], train_cfg["lr_decay_epoch"], ) train(net, optimizer, train_loader, epoch) val(net, val_loader, epoch, use_motion=motion_model) if args.use_occlussion and args.linemod_cls in cfg.occ_linemod_cls_names: val(net, occ_val_loader, epoch, "occ_val", use_motion=motion_model) save_model(net.module.net, optimizer, epoch, model_dir)
def main(): # Training settings parser = argparse.ArgumentParser( description='PyTorch Image Classification') parser.add_argument('--dataset', type=str, default='cifar100', help='specify training dataset') parser.add_argument('--session', type=int, default='1', help='training session to recoder multiple runs') parser.add_argument('--arch', type=str, default='resnet110', help='specify network architecture') parser.add_argument('--bs', dest="batch_size", type=int, default=128, help='training batch size') parser.add_argument('--gpu0-bs', dest="gpu0_bs", type=int, default=0, help='training batch size on gpu0') parser.add_argument('--add-ccn', type=str, default='no', help='add cross neruon communication') parser.add_argument('--mgpus', type=str, default="no", help='multi-gpu training') parser.add_argument('--resume', dest="resume", type=int, default=0, help='resume epoch') args = parser.parse_args() cfg.merge_from_file(osp.join("configs", args.dataset + ".yaml")) cfg.dataset = args.dataset cfg.arch = args.arch cfg.add_cross_neuron = True if args.add_ccn == "yes" else False use_cuda = True if torch.cuda.is_available() else False cfg.use_cuda = use_cuda cfg.training.batch_size = args.batch_size cfg.mGPUs = True if args.mgpus == "yes" else False torch.manual_seed(cfg.initialize.seed) device = torch.device("cuda" if use_cuda else "cpu") train_loader, test_loader = create_data_loader(cfg) model = CrossNeuronNet(cfg) print("parameter numer: %d" % (count_parameters(model))) with torch.cuda.device(0): if args.dataset == "cifar100": flops, params = get_model_complexity_info( model, (3, 32, 32), as_strings=True, print_per_layer_stat=True) # flops, params = profile(model, input_size=(1, 3, 32, 32)) elif args.dataset == "imagenet": flops, params = get_model_complexity_info( model, (3, 224, 224), as_strings=True, print_per_layer_stat=True) # flops, params = profile(model, input_size=(1, 3, 224, 224)) print('Flops: {}'.format(flops)) print('Params: {}'.format(params)) model = model.to(device) # optimizer_policy = model.get_optim_policies() optimizer = optim.SGD(model.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) # optimizer = optim.Adam(model.parameters(), lr=1e-3) if cfg.mGPUs: if args.gpu0_bs > 0: model = BalancedDataParallel(args.gpu0_bs, model).to(device) else: model = nn.DataParallel(model).to(device) lr = cfg.optimizer.lr checkpoint_tag = osp.join("checkponts", args.dataset, args.arch) if not osp.exists(checkpoint_tag): os.makedirs(checkpoint_tag) if args.resume > 0: ckpt_path = osp.join(checkpoint_tag, ("ccn" if cfg.add_cross_neuron else "plain") + "_{}_{}.pth".format(args.session, args.resume)) print("resume model from {}".format(ckpt_path)) ckpt = torch.load(ckpt_path) model.load_state_dict(ckpt["model"]) print("resume model succesfully") acc = test(cfg, model, device, test_loader) best_acc = 0 for epoch in range(args.resume + 1, cfg.optimizer.max_epoch + 1): if epoch in cfg.optimizer.lr_decay_schedule: adjust_learning_rate(optimizer, cfg.optimizer.lr_decay_gamma) lr *= cfg.optimizer.lr_decay_gamma print('Train Epoch: {} learning rate: {}'.format(epoch, lr)) tic = time.time() train(cfg, model, device, train_loader, optimizer, epoch) acc = test(cfg, model, device, test_loader) time_cost = time.time() - tic if acc > best_acc: best_acc = acc print( '\nModel: {} Best Accuracy-Baseline: {}\tTime Cost per Epoch: {}\n' .format( checkpoint_tag + ("ccn" if args.add_ccn == "yes" else "plain"), best_acc, time_cost)) if epoch % cfg.log.checkpoint_interval == 0: checkpoint = { "arch": cfg.arch, "model": model.state_dict(), "epoch": epoch, "lr": lr, "test_acc": acc, "best_acc": best_acc } torch.save( checkpoint, osp.join(checkpoint_tag, ("ccn" if cfg.add_cross_neuron else "plain") + "_{}_{}.pth".format(args.session, epoch)))
def train_net(): # mp.spawn(demo_basic, # args=(4,), # nprocs=4, # join=True) tf_dir = './runs/' + train_cfg['exp_name'] writer = SummaryWriter(log_dir=tf_dir) Path("/home/gerard/myPvnet/pvnet/{}".format(train_cfg["exp_name"])).mkdir( parents=True, exist_ok=True) model_dir = os.path.join(cfg.MODEL_DIR, train_cfg['model_name']) imNet = ImageUNet(ver_dim=(vote_num * 2), seg_dim=2) estNet = EstimateUNet(ver_dim=(vote_num * 2), seg_dim=2) net = NetWrapper(imNet, estNet) net = DataParallel(net).cuda() # if train_cfg['exp_name'] == 'AE_ape': # model_Dir=os.path.join(cfg.MODEL_DIR,'ape_linemod_train_GE') # imNet=load_pretrained_imNet(ImageUNet(ver_dim=(vote_num*2), seg_dim=2), model_Dir, epoch=0) # elif train_cfg['exp_name'] == 'GE_ape': # model_Dir=os.path.join(cfg.MODEL_DIR,'ape_linemod_train_AE') # # estNet=load_pretrained_estNet(EstimateUNet(ver_dim=(vote_num*2), seg_dim=2), model_Dir, epoch=25) # elif train_cfg['exp_name'] == 'AE_GE_ape': # model_Dir=os.path.join(cfg.PVModelDir)['net']) # PVNet = PVNet.cuda(rank) # PVNet=DistributedDataParallel(PVNet, device_ids=[rank]) PVModelDir = '/home/gerard/baseline_models/{}_baseline/199.pth'.format( train_cfg['object']) PVNet = PVnet(ver_dim=vote_num * 2, seg_dim=2) PVNet.load_state_dict(torch.load(PVModelDir)['net']) PVNet = PVNet.half() PVNet = DataParallel(PVNet).cuda() randomCropping = RandomScaleCrop() optimizer = optim.Adam(net.parameters(), lr=train_cfg['lr']) motion_model = train_cfg['motion_model'] print('motion state {}'.format(motion_model)) for param_group in optimizer.param_groups: lr = param_group['lr'] image_db = LineModImageDB(args.linemod_cls, has_render_set=False, has_fuse_set=False) test_db = image_db.test_real_set + image_db.val_real_set test_set = LineModDatasetRealAug(test_db, randomCropping, cfg.LINEMOD, vote_type, augment=False, use_motion=motion_model) test_sampler = SequentialSampler(test_set) test_batch_sampler = ImageSizeBatchSampler(test_sampler, train_cfg['test_batch_size'], False) test_loader = DataLoader(test_set, batch_sampler=test_batch_sampler, num_workers=0) prefix = 'test' #if args.use_test_set else 'val' if args.test_model: begin_epoch = load_model(net.module.imNet, net.module.estNet, optimizer, model_dir, args.load_epoch) if args.normal: print('testing normal linemod ...') # image_db = LineModImageDB(args.linemod_cls,has_render_set=False, # has_fuse_set=False) # test_db = image_db.test_real_set+image_db.val_real_set # test_set = LineModDatasetRealAug(test_db, randomCropping, cfg.LINEMOD, vote_type, augment=False, use_motion=motion_model) # test_sampler = SequentialSampler(test_set) # test_batch_sampler = ImageSizeBatchSampler(test_sampler, train_cfg['test_batch_size'], False) # test_loader = DataLoader(test_set, batch_sampler=test_batch_sampler, num_workers=0) val_db = image_db.test_real_set + image_db.val_real_set val_set = LineModDatasetRealAug(val_db, randomCropping, cfg.LINEMOD, vote_type, augment=False, cfg=train_cfg['aug_cfg'], use_motion=motion_model) val_sampler = SequentialSampler(val_set) val_batch_sampler = ImageSizeBatchSampler( val_sampler, train_cfg['test_batch_size'], False, cfg=train_cfg['aug_cfg']) val_loader = DataLoader(val_set, batch_sampler=val_batch_sampler, num_workers=16) print('test with val loader') _, _, _, _, _, _, _, _, _ = val(net, PVNet, val_loader, begin_epoch, lr, writer, use_motion=motion_model) prefix = 'test' if args.use_test_set else 'val' print('test with test_loader') _, _, _, _, _, _, _, _, _ = val(net, PVNet, test_loader, begin_epoch, lr, writer, use_motion=motion_model) if args.occluded and args.linemod_cls in cfg.occ_linemod_cls_names: print('testing occluded linemod ...') occ_image_db = OcclusionLineModImageDB(args.linemod_cls) occ_test_db = occ_image_db.test_real_set occ_test_set = LineModDatasetRealAug(occ_test_db, randomCropping, cfg.OCCLUSION_LINEMOD, vote_type, augment=False, use_motion=motion_model) occ_test_sampler = SequentialSampler(occ_test_set) occ_test_batch_sampler = ImageSizeBatchSampler( occ_test_sampler, train_cfg['test_batch_size'], False) occ_test_loader = DataLoader(occ_test_set, batch_sampler=occ_test_batch_sampler, num_workers=0) prefix = 'occ_test' if args.use_test_set else 'occ_val' _, _, _, _, _, _, _, _, _ = val(net, PVNet, occ_test_loader, begin_epoch, lr, writer, prefix, use_motion=motion_model) if args.truncated: print('testing truncated linemod ...') trun_image_db = TruncatedLineModImageDB(args.linemod_cls) print(len(trun_image_db.set)) trun_image_set = LineModDatasetRealAug(trun_image_db.set, cfg.LINEMOD, vote_type, randomCropping, augment=False, use_intrinsic=True, use_motion=motion_model) trun_test_sampler = SequentialSampler(trun_image_set) trun_test_batch_sampler = ImageSizeBatchSampler( trun_test_sampler, train_cfg['test_batch_size'], False) trun_test_loader = DataLoader( trun_image_set, batch_sampler=trun_test_batch_sampler, num_workers=16) prefix = 'trun_test' _, _, _, _, _, _, _, _, _ = val(net, PVNet, trun_test_loader, begin_epoch, lr, writer, prefix, True, use_motion=motion_model) else: begin_epoch = 0 if train_cfg['resume']: begin_epoch = load_model(net.module.imNet, net.module.estNet, optimizer, model_dir) # reset learning rate for param_group in optimizer.param_groups: param_group['lr'] = train_cfg['lr'] lr = param_group['lr'] image_db = LineModImageDB(args.linemod_cls, has_fuse_set=train_cfg['use_fuse'], has_render_set=True) train_db = [] train_db += image_db.render_set if train_cfg['use_real_train']: train_db += image_db.train_real_set if train_cfg['use_fuse']: train_db += image_db.fuse_set train_set = LineModDatasetRealAug(train_db, randomCropping, cfg.LINEMOD, vote_type, augment=True, cfg=train_cfg['aug_cfg'], use_motion=motion_model) train_sampler = RandomSampler(train_set) # train_sampler = torch.utils.data.distributed.DistributedSampler( # train_set, # num_replicas=world_size, # rank=rank # ) # train_batch_sampler = ImageSizeBatchSampler(train_sampler, int(train_cfg['train_batch_size']/world_size), False, cfg=train_cfg['aug_cfg']) train_batch_sampler = ImageSizeBatchSampler( train_sampler, train_cfg['train_batch_size'], False, cfg=train_cfg['aug_cfg']) # train_loader = DataLoader(train_set, batch_sampler=train_batch_sampler, shuffle=False, num_workers=16, pin_memory=True) train_loader = DataLoader(train_set, batch_sampler=train_batch_sampler, num_workers=16) val_db = image_db.test_real_set + image_db.val_real_set val_set = LineModDatasetRealAug(val_db, randomCropping, cfg.LINEMOD, vote_type, augment=False, cfg=train_cfg['aug_cfg'], use_motion=motion_model) val_sampler = SequentialSampler(val_set) val_batch_sampler = ImageSizeBatchSampler(val_sampler, train_cfg['test_batch_size'], False, cfg=train_cfg['aug_cfg']) val_loader = DataLoader(val_set, batch_sampler=val_batch_sampler, num_workers=16) if args.linemod_cls in cfg.occ_linemod_cls_names: occ_image_db = OcclusionLineModImageDB(args.linemod_cls) occ_val_db = occ_image_db.test_real_set[:len(occ_image_db. test_real_set) // 2] occ_val_set = LineModDatasetRealAug(occ_val_db, randomCropping, cfg.OCCLUSION_LINEMOD, vote_type, augment=False, cfg=train_cfg['aug_cfg'], use_motion=motion_model) occ_val_sampler = SequentialSampler(occ_val_set) occ_val_batch_sampler = ImageSizeBatchSampler( occ_val_sampler, train_cfg['test_batch_size'], False, cfg=train_cfg['aug_cfg']) occ_val_loader = DataLoader(occ_val_set, batch_sampler=occ_val_batch_sampler, num_workers=16) add_list_list = [] p_inc_list = [] p_dec_v_list = [] p_dec_q_list = [] largest_a_list = [] smallest_v_list = [] smallest_q_list = [] first_a_list = [] first_v_list = [] epoch_count = 0 for epoch in range(begin_epoch, train_cfg['epoch_num']): adjust_learning_rate(optimizer, epoch, train_cfg['lr_decay_rate'], train_cfg['lr_decay_epoch']) for param_group in optimizer.param_groups: lr = param_group['lr'] train(net, PVNet, optimizer, train_loader, epoch) # print('evaluate with test_loader') # _,_,_,_,_,_,_,_,_ = val(net, PVNet, test_loader, epoch, lr, writer, use_motion=motion_model) # print('evaluate with train_loader') # _,_,_,_,_,_,_,_,_ = val(net, PVNet, train_loader, epoch, lr, writer, use_motion=motion_model) print('evaluate with val_loader') add_list, first_a, first_v, largest_a, smallest_v, smallest_q, p_inc_add, p_dec_v, p_dec_q = val( net, PVNet, val_loader, epoch, lr, writer, use_motion=motion_model) if (train_cfg['eval_epoch'] and epoch % train_cfg['eval_inter'] == 0 and epoch >= train_cfg['eval_epoch_begin']) or args.test_model: if epoch >= 30: # add_list_list.append(add_list) # first_a_list.append(first_a) first_v_list.append(first_v) # p_inc_list.append(p_inc_add) p_dec_v_list.append(p_dec_v) p_dec_q_list.append(p_dec_q) # largest_a_list.append(largest_a) smallest_v_list.append(smallest_v) smallest_q_list.append(smallest_q) # if args.linemod_cls in cfg.occ_linemod_cls_names: # val(net, PVNet, occ_val_loader, epoch, lr, writer, 'occ_val',use_motion=motion_model) save_model(net.module.imNet, net.module.estNet, optimizer, epoch, model_dir) epoch_count += 1 print(train_cfg['exp_name']) # print('PVNet ADD. mean: {} +/- {}, max: {}'.format(np.mean(first_a_list),np.std(first_a_list),np.max(first_a_list))) # print('PVNet X-X^. mean: {} +/- {}, max: {}'.format(np.mean(first_v_list),np.std(first_v_list),np.max(first_v_list))) # print('ADD. mean: {} +/- {}, max: {}: '.format(np.mean(largest_a_list),np.std(largest_a_list),np.max(largest_a_list))) # print('ADD perc increase. mean: {} +/- {}, max: {}'.format(np.mean(p_inc_list),np.std(p_inc_list),np.max(p_inc_list))) print('X-X^ perc decrease. mean: {} +/- {}, max: {}'.format( np.mean(p_dec_v_list), np.std(p_dec_v_list), np.max(p_dec_v_list))) print('q-q^ perc decrease. mean: {} +/- {}, max: {}'.format( np.mean(p_dec_q_list), np.std(p_dec_q_list), np.max(p_dec_q_list)))