def get_model(cuda=True): attr, _ = get_tasks(opt) attr, attr_name = get_tasks(opt) device = 'cuda' if cuda else 'cpu' model, _, _ = get_m(opt.conv, device=device, classifier=opt.classifier, attr=attr) # load the model, need to move the prefix "module." state_dict = torch.load(opt.model_path, map_location='cpu') # for k in list(state_dict.keys()): # k_new = k[7:] # state_dict[k_new] = state_dict[k] # state_dict.pop(k) model.load_state_dict(state_dict, strict=True) if cuda: model = model.cuda() model.eval() return model
def jit_trace(args, model_path): attr, attr_name = get_tasks(args) model, _, _ = get_model(args.conv, classifier=args.classifier, dropout=args.dropout, attr=attr) model.load_state_dict(torch.load(model_path)) model.cpu() example = torch.rand(1, 3, 224, 224) a = torch.jit.trace(model.eval(), example) # a.save('{}.pt'.format(args.conv)) a.save('{}.pt'.format('ap22')) print('transform succeed')
def get_model(cuda=True): attr, _ = get_tasks(opt) model = CubeNet(opt.train, opt.conv, attr, pretrained=False, img_size=opt.person_size, attention=opt.attention, dropout=opt.dropout, at=opt.at, at_loss=opt.at_loss) # load the model, need to move the prefix "module." state_dict = torch.load(opt.model_path, map_location='cpu')["state_dict"] for k in list(state_dict.keys()): k_new = k[7:] state_dict[k_new] = state_dict[k] state_dict.pop(k) model.load_state_dict(state_dict, strict=True) if cuda: model = model.cuda() model.eval() return model
logger_file("val: Validation Results - Epoch: {} - LR: {}".format(epoch, optimizer.optimizer.param_groups[0]['lr'])) print_summar_table(logger_file, attr_name, metrics_info['logger']) logger_file('AP:%0.3f' % metrics_info['logger']['attr']['ap'][-1]) parser = argparse.ArgumentParser(description='PyTorch my data Training') args = parse_opts() max_epoch = args.max_epoch - args.distill_epoch device = 'cuda' if torch.cuda.is_available() else 'cpu' log = Logger('both', filename=os.path.join(args.log_dir, args.log_file + '_all'), level='debug', mode='both') logger = log.logger.info log_config(args, logger) log_file = Logger('file', filename=os.path.join(args.log_dir, args.log_file), level='debug', mode='file') logger_file = log_file.logger.info attr, attr_name = get_tasks(args) criterion_CE, metrics = get_losses_metrics(attr, args.categorical_loss) # Load dataset, net, evaluator, Saver trainloader, testloader = get_data(args, attr, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) t_net, s_net, channel_t, channel_s, layer_t, layer_s, index, filter_list = \ get_pair_model(args.size, frm='my', name_t=args.name_t, name_s=args.name_s, load_BN=args.load_BN, logger=logger, bucket=args.bucket, classifier=args.classifier) if args.direct_connect: distill_net = AB_distill_Mobilenetl2MobilenetsNoConnect(t_net, s_net, args.batch_size, args.DTL, args.AB_loss_multiplier, args.DTL_loss_multiplier, channel_t, channel_s, layer_t, layer_s, criterion_CE, index, args.DTL_loss) else: distill_net = AB_distill_Mobilenetl2Mobilenets(t_net, s_net, args.batch_size, args.DTL, args.AB_loss_multiplier, args.DTL_loss_multiplier, channel_t, channel_s, layer_t, layer_s, criterion_CE, args.stage1, args.DTL_loss) if device == 'cuda': s_net = torch.nn.DataParallel(s_net).cuda()
def run(opt): # logging.basicConfig(filename=os.path.join(opt.log_dir, opt.log_file), level=logging.INFO) # logger = logging.getLogger() # # logger.addHandler(logging.StreamHandler()) # logger = logger.info log = Logger(filename=os.path.join(opt.log_dir, opt.log_file), level='debug') logger = log.logger.info # Decide what attrs to train attr, attr_name = get_tasks(opt) # Generate model based on tasks logger('Loading models') model, parameters, mean, std = generate_model(opt, attr) # parameters[0]['lr'] = 0 # parameters[1]['lr'] = opt.lr / 3 logger('Loading dataset') train_loader, val_loader = get_data(opt, attr, mean, std) writer = create_summary_writer(model, train_loader, opt.log_dir) # have to after writer model = nn.DataParallel(model, device_ids=None) # Learning configurations if opt.optimizer == 'sgd': optimizer = SGD(parameters, lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=opt.nesterov) elif opt.optimizer == 'adam': optimizer = Adam(parameters, lr=opt.lr, betas=opt.betas) else: raise Exception("Not supported") scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=opt.lr_patience, factor=opt.factor, min_lr=1e-6) # Loading checkpoint if opt.checkpoint: logger('loading checkpoint {}'.format(opt.checkpoint)) checkpoint = torch.load(opt.checkpoint) opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) device = 'cuda' loss_fns, metrics = get_losses_metrics(attr, opt.categorical_loss, opt.at, opt.at_loss) trainer = my_trainer( model, optimizer, lambda pred, target, epoch: multitask_loss( pred, target, loss_fns, len(attr_name), opt.at_coe, epoch), device=device) train_evaluator = create_supervised_evaluator( model, metrics={'multitask': MultiAttributeMetric(metrics, attr_name)}, device=device) val_evaluator = create_supervised_evaluator( model, metrics={'multitask': MultiAttributeMetric(metrics, attr_name)}, device=device) # Training timer handlers model_timer, data_timer = Timer(average=True), Timer(average=True) model_timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) data_timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_COMPLETED, pause=Events.ITERATION_STARTED, step=Events.ITERATION_STARTED) # Training log/plot handlers @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter_num = (engine.state.iteration - 1) % len(train_loader) + 1 if iter_num % opt.log_interval == 0: logger( "Epoch[{}] Iteration[{}/{}] Sum Loss: {:.2f} Cls Loss: {:.2f} At Loss: {:.2f} " "Coe: {:.2f} Model Process: {:.3f}s/batch Data Preparation: {:.3f}s/batch" .format(engine.state.epoch, iter_num, len(train_loader), engine.state.output['sum'], engine.state.output['cls'], engine.state.output['at'], engine.state.output['coe'], model_timer.value(), data_timer.value())) writer.add_scalar("training/loss", engine.state.output['sum'], engine.state.iteration) # Log/Plot Learning rate @trainer.on(Events.EPOCH_STARTED) def log_learning_rate(engine): lr = optimizer.param_groups[-1]['lr'] logger('Epoch[{}] Starts with lr={}'.format(engine.state.epoch, lr)) writer.add_scalar("learning_rate", lr, engine.state.epoch) # Checkpointing @trainer.on(Events.EPOCH_COMPLETED) def save_checkpoint(engine): if engine.state.epoch % opt.save_interval == 0: save_file_path = os.path.join( opt.log_dir, 'save_{}.pth'.format(engine.state.epoch)) states = { 'epoch': engine.state.epoch, 'arch': opt.model, 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(states, save_file_path) # model.eval() # example = torch.rand(1, 3, 224, 224) # traced_script_module = torch.jit.trace(model, example) # traced_script_module.save(save_file_path) # model.train() # torch.save(model._modules.state_dict(), save_file_path) # val_evaluator event handlers @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): data_list = [train_loader, val_loader] name_list = ['train', 'val'] eval_list = [train_evaluator, val_evaluator] for data, name, evl in zip(data_list, name_list, eval_list): evl.run(data) metrics_info = evl.state.metrics["multitask"] for m, val in metrics_info['metrics'].items(): writer.add_scalar(name + '_metrics/{}'.format(m), val, engine.state.epoch) for m, val in metrics_info['summaries'].items(): writer.add_scalar(name + '_summary/{}'.format(m), val, engine.state.epoch) logger( name + ": Validation Results - Epoch: {}".format(engine.state.epoch)) print_summar_table(logger, attr_name, metrics_info['logger']) # Update Learning Rate if name == 'train': scheduler.step(metrics_info['logger']['attr']['ap'][-1]) # kick everything off logger('Start training') trainer.run(train_loader, max_epochs=opt.n_epochs) writer.close()
input_graph_def = graph.as_graph_def() if clear_devices: for node in input_graph_def.node: node.device = "" frozen_graph = convert_variables_to_constants(session, input_graph_def, output_names, freeze_var_names) return frozen_graph opt = parse_opts() input_np = np.random.uniform(0, 1, (1, 3, opt.person_size, opt.person_size)) input_var = Variable(torch.FloatTensor(input_np)) attr, _ = get_tasks(opt) model = CubeNet(opt.train, opt.conv, attr, pretrained=False, img_size=opt.person_size, attention=opt.attention, dropout=opt.dropout, at=opt.at, at_loss=opt.at_loss) path = "CubeModel/pretrained/save_60.pth" state_dict = torch.load(path, map_location='cpu')["state_dict"] for k in list(state_dict.keys()): k_new = k[7:] state_dict[k_new] = state_dict[k]