def train(args): # parse config place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): config = parse_config(args.config) train_config = merge_configs(config, 'train', vars(args)) #根据自己定义的网络,声明train_model train_model = TSN(args.batch_size, 32) train_model.train() opt = fluid.optimizer.Momentum(0.001, 0.9, parameter_list=train_model.parameters()) if args.pretrain: # 加载上一次训练的模型,继续训练 model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/tsn_model') train_model.load_dict(model) # build model if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # 迭代器,每次输送一个批次的数据,每个数据的形式(imgs, label) train_reader = UCFReader('train', train_config).create_reader() epochs = args.epoch for i in range(epochs): for batch_id, data in enumerate(train_reader()): dy_x_data = np.array([x[0] for x in data]).astype( 'float32' ) # dy_x_data=imgs shape:(batch_size, seg_num*seglen, 3, target_size, target_size) y_data = np.array([x[1] for x in data]).astype( 'int64') # y_data=label shape:(batch_size,) img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) label = fluid.layers.reshape(label, [args.batch_size, 1]) label.stop_gradient = True # 反向求导时,只有目标函数中的label部分对label求导(而不是label与img融合求导) out, acc = train_model(img, label) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() opt.minimize(avg_loss) train_model.clear_gradients() if batch_id % 100 == 0: print("Loss at epoch {} step {}: {}, acc: {}".format( i, batch_id, avg_loss.numpy(), acc.numpy())) fluid.dygraph.save_dygraph(train_model.state_dict(), args.save_dir + '/tsn_model') print("Final loss: {}".format(avg_loss.numpy()))
def main(): global args, best_prec1 args = parser.parse_args() if args.dataset == 'ucf101': num_class = 101 elif args.dataset == 'hmdb51': num_class = 51 elif args.dataset == 'kinetics': num_class = 400 else: raise ValueError('Unknown dataset ' + args.dataset) if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 else: data_length = 5 # generate 5 displacement map, using 6 RGB images model = TSN(num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, new_length=data_length) model = model.to(device) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() if device.type == 'cuda': model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict'], strict=True) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() train_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff", "CV"] else args.flow_prefix + "{}_{:05d}.jpg", transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( "", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff", "CV"] else args.flow_prefix + "{}_{:05d}.jpg", random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=args.arch == 'BNInception'), ToTorchFormatTensor(div=args.arch != 'BNInception'), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss().to(device) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, args.lr_steps, gamma=0.1) if args.evaluate: validate(val_loader, model, criterion, 0) return for epoch in range(0, args.epochs): scheduler.step() if epoch < args.start_epoch: continue # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) writer.close()
add_nodes(u[0]) if hasattr(var, 'saved_tensors'): for t in var.saved_tensors: dot.edge(str(id(t)), str(id(var))) add_nodes(t) add_nodes(var.grad_fn) return dot if __name__ == '__main__': net = TSN(8,3,'RGB') x = Variable(torch.randn(64,9, 224, 224)) y = net(x) g = make_dot(y) g.view() params = list(net.parameters()) k = 0 for i in params: l = 1 print("该层的结构:" + str(list(i.size()))) for j in i.size(): l *= j print("该层参数和:" + str(l)) k = k + l print("总参数数量和:" + str(k))
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = TSN(339, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn) _, cnn = list(model.named_children())[0] for p in cnn.parameters(): p.requires_grad = False crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = model.get_optim_policies() train_augmentation = model.get_augmentation() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # remove if not transfer learning checkpoint = torch.load('/home/ec2-user/mit_weights.pth.tar') model.load_state_dict(checkpoint['state_dict']) for module in list( list(model._modules['module'].children()) [-1].children())[-1].children(): module[-1] = nn.Linear(256, num_class) if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True model.cuda() # Data loading code if args.modality != 'RGBDiff': normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 train_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(TSNDataSet( args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.Adam(model.parameters(), args.lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): scheduler.step() # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) summary_writer.close()
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join(['STModeling', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments]) print('storing name: ' + args.store_name) model = TSN(num_class, args) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] # best_prec1 = 0 model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) print(model) cudnn.benchmark = True # Data loading code if ((args.modality != 'RGBDiff') | (args.modality != 'RGBFlow')): normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 elif args.modality == 'RGBFlow': data_length = args.num_motion train_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False) val_loader = torch.utils.data.DataLoader( TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.consensus_type == 'DNDF': params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=1e-5) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') history = { 'accuracy': [], 'val_accuracy': [], 'loss': [], 'val_loss': [] } model_details = { 'backbone': args.arch, 'transformer_arch': args.consensus_type, 'lr': args.lr, 'batch_size': args.batch_size } for epoch in range(args.start_epoch, args.epochs): if not args.consensus_type == 'DNDF': adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch acc, loss = train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1, val_loss = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) history['accuracy'].append(acc) history['loss'].append(loss) history['val_accuracy'].append(prec1) history['val_loss'].append(val_loss) plot_utils.plot_statistics(history,model_details)
def main(): check_rootfolders() global best_prec1 if args.run_for == 'train': categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset( args.dataset, args.modality) elif args.run_for == 'test': categories, args.test_list, args.root_path, prefix = datasets_video.return_data( args.dataset, args.modality) num_class = len(categories) args.store_name = '_'.join([ 'STModeling', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d' % args.num_segments ]) print('storing name: ' + args.store_name) model = TSN(num_class, args) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std train_augmentation = model.get_augmentation() policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] # best_prec1 = 0 model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) #print(model) cudnn.benchmark = True # Data loading code if ((args.modality != 'RGBDiff') | (args.modality != 'RGBFlow')): normalize = GroupNormalize(input_mean, input_std) else: normalize = IdentityTransform() if args.modality == 'RGB': data_length = 1 elif args.modality in ['Flow', 'RGBDiff']: data_length = 5 elif args.modality == 'RGBFlow': data_length = args.num_motion if args.run_for == 'train': train_loader = torch.utils.data.DataLoader(TSNDataSet( "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data", args.train_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False) val_loader = torch.utils.data.DataLoader(TSNDataSet( "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data", args.val_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) # define loss function (criterion) and optimizer if args.loss_type == 'nll': criterion = torch.nn.CrossEntropyLoss().cuda() else: raise ValueError("Unknown loss type") for group in policies: print( ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.consensus_type == 'DNDF': params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=1e-5) if args.evaluate: validate(val_loader, model, criterion, 0) return log_training = open( os.path.join(args.root_log, '%s.csv' % args.store_name), 'w') for epoch in range(args.start_epoch, args.epochs): if not args.consensus_type == 'DNDF': adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) elif args.run_for == 'test': print("=> loading checkpoint '{}'".format(args.root_weights)) checkpoint = torch.load(args.root_weights) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) model.cuda().eval() print("=> loaded checkpoint ") test_loader = torch.utils.data.DataLoader(TSNDataSet( "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data", args.test_list, num_segments=args.num_segments, new_length=data_length, modality=args.modality, image_tmpl=prefix, dataset=args.dataset, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3']), isRGBFlow=(args.modality == 'RGBFlow')), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) # cam = cv2.VideoCapture(0) # cam.set(cv2.CAP_PROP_FPS, 48) # for i, (input, _) in enumerate(test_loader): # with torch.no_grad(): # input_var = torch.autograd.Variable(input) # # ret, frame = cam.read() # frame_map = np.full((280, 640, 3), 0, np.uint8) # frame_map = frame # print(frame_map) # while (True): # bg = np.full((480, 1200, 3), 15, np.uint8) # bg[:480, :640] = frame # # font = cv2.FONT_HERSHEY_SIMPLEX # # cv2.rectangle(bg, (128, 48), (640 - 128, 480 - 48), (0, 255, 0), 3) # # cv2.imshow('preview', bg) # # if cv2.waitKey(1) & 0xFF == ord('q'): # break test(test_loader, model, categories)
def main(): place = fluid.CUDAPlace(0) with fluid.dygraph.guard(place): global args args = parser.parse_args() if args.dataset == 'ucf101': args.num_class = 101 elif args.dataset == 'hmdb51': args.num_class = 51 elif args.dataset == 'kinetics': args.num_class = 400 else: raise ValueError('Unknown dataset '+args.dataset) model = TSN(args.num_class, args.num_segments, args.modality, args.arch, dropout=args.dropout) args.short_size = model.scale_size args.target_size = model.crop_size args.input_mean = model.input_mean args.input_std = model.input_std * 3 if args.pretrained_parts == 'finetune': print('***Finetune model with {}***'.format(args.pretrained_model)) state_dict = fluid.dygraph.load_dygraph(args.pretrained_model)[0] model_dict = model.state_dict() print('extra keys: {}'.format(set(list(state_dict.keys())) - set(list(model_dict.keys())))) print('missing keys: {}'.format(set(list(model_dict.keys())) - set(list(state_dict.keys())))) for k, v in state_dict.items(): if 'fc' not in k: model_dict.update({k:v}) model.set_dict(model_dict) optimizer = fluid.optimizer.Momentum(args.lr, args.momentum, model.parameters(), regularization=fluid.regularizer.L2Decay(args.weight_decay), grad_clip=fluid.clip.GradientClipByGlobalNorm(args.clip_gradient)) train_reader = KineticsReader('train', args, args.train_list).create_reader() val_reader = KineticsReader('val', args, args.val_list).create_reader() saturate_cnt = 0 best_prec1 = 0 log = open(os.path.join(args.log_path, args.save_name+'_train.csv'), 'w') for epoch in range(args.epochs): if saturate_cnt == args.num_saturate: print('learning rate decay by 0.1.') log.write('learning rate decay by 0.1. \n') log.flush() adjust_learing_rate(optimizer) saturate_cnt = 0 train(train_reader, model, optimizer, epoch, log) if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_reader, model, epoch, log) is_best = prec1 > best_prec1 if is_best: saturate_cnt = 0 else: saturate_cnt = saturate_cnt + 1 output = "- Validation Prec@1 saturates for {} epochs. Best acc{}".format(saturate_cnt, best_prec1) print(output) log.write(output + '\n') log.flush() best_prec1 = max(prec1, best_prec1) if is_best: fluid.dygraph.save_dygraph(model.state_dict(), os.path.join(args.save_dir, args.save_name)) log.close()