def main(args): # create configuration config = Config(is_history=args.hist, hist_net=args.hist_net) # set seed torch.manual_seed(config.seed) np.random.seed(config.seed) # prepare dataloaders print("===> Creating the test dataloader...") # test loader _, _, test_loader = get_loader(config) # create model print("===> Creating the model (is_history={})".format( str(config.is_history))) model = CombinedNet16s(is_history=config.is_history, is_dec_affine=config.is_dec_affine, is_insnorm_layer=config.is_insnorm_layer, hist_net_type=config.hist_net).cuda() print(model) # load checkpoint print('===> Loading the checkpoint:{}'.format(args.model)) load_checkpoint(model, args.model) # call test() test(model, test_loader)
def main(): # Set the random seed for reproducible experiments torch.manual_seed(230) parser = argparse.ArgumentParser() parser.add_argument('--data_dir', help="Directory containing the dataset") parser.add_argument('--model_dir', help="Directory containing params.json") parser.add_argument('--params', help='Directory containing params.json') parser.add_argument('--restore_file', default='best', help="name of the file in --model_dir \ containing weights to load") params = utils.Params(args.params) # Get the logger utils.set_logger(os.path.join(params.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") test_dataset = dataset(file_path=params.metadata_file, split="Test", classes=params.classes) test_loader = DataLoader(dataset=test_dataset, batch_size=params.batch_size, shuffle=True, num_workers=8) logging.info("- done.") # Define the model and optimizer if model != "Inception": net = importlib.import_module("features.models.{}".format( params.model)) model = net.Net() inception = False else: model = models.inception_v3(pretrained=False) model.fc = nn.Linear(2048, num_classes) model.AuxLogits.fc = nn.Linear(768, 1) inception = True model.cuda() metrics_save = metrics_code.metrics_save logging.info("Starting evaluation") # Reload weights from the saved file utils.load_checkpoint( os.path.join(args.model_dir, args.restore_file + '.pth.tar'), model) # Evaluate test_metrics = evaluate(model, test_loader, metrics_save, experiment, inception) save_path = os.path.join(model_dir, "metrics_test_{}.json".format(restore_file)) utils.save_dict_to_json(test_metrics, save_path)
def train(): torch.cuda.set_device(0) iteration = 0 model = WaveRNN(HPARAMS) model = model.cuda() optimizer = optim.Adam(model.parameters(), lr=HPARAMS.lr) if ARGS.checkpoint: if os.path.basename(ARGS.checkpoint).startswith('ema_model'): ema_checkpoint = ARGS.checkpoint else: ema_checkpoint = 'ema_model_' + os.path.basename(ARGS.checkpoint) ema_checkpoint = os.path.join(os.path.dirname(ARGS.checkpoint), ema_checkpoint) # Initialise EMA from the ema checkpoint. logging.info('Initialising ema model {}'.format(ema_checkpoint)) ema_model = WaveRNN(HPARAMS).cuda() ema_base_model, _ = load_checkpoint(ema_checkpoint, ema_model) ema = init_ema(ema_base_model, HPARAMS.ema_rate) # Initialise vanilla model logging.info('Loading checkpoint {}'.format(ARGS.checkpoint)) model, iteration, optimizer = load_checkpoint(ARGS.checkpoint, model, optimizer) else: # Initialise EMA from scratch. ema = init_ema(model, HPARAMS.ema_rate) criterion = nn.NLLLoss(reduction='sum').cuda() train_loader, test_loader = get_loader(ARGS.data, 'train', HPARAMS), get_loader(ARGS.data, 'valid', HPARAMS) whole_loader = get_loader(ARGS.data, 'valid', HPARAMS, whole=True) model = nn.DataParallel(model) epoch_offset = max(0, int(iteration / len(train_loader))) for _ in range(epoch_offset, ARGS.epochs): iteration = train_step( train_loader, test_loader, whole_loader, model, optimizer, criterion, iteration, ema=ema ) averaged_model = clone_as_averaged_model(model, ema) save_checkpoint( { 'state_dict': model.module.state_dict(), 'iteration': iteration, 'dataset': ARGS.data, 'optimizer': optimizer.state_dict(), }, iteration, 'checkpoints/{}/lastmodel.pth'.format(ARGS.expName), ARGS.expName, ) save_checkpoint( { 'state_dict': averaged_model.state_dict(), 'iteration': iteration, 'dataset': ARGS.data, 'optimizer': optimizer.state_dict(), }, iteration, 'checkpoints/{}/ema_model_lastmodel.pth'.format(ARGS.expName), ARGS.expName, )
def vanilla_policy_gradient(args): """This is where the VPG magic happens""" writer = TBWrapper(experiment_name) device = get_device(args.device) policy = KarpathyPongPolicy(device, PRE_PROCESS_OUTPUT_DIM**2, args.num_actions) optimizer = torch.optim.RMSprop(policy.parameters(), lr=args.policy_lr, alpha=args.rms_decay_rate, centered=True) optimizer.zero_grad() if args.resume: load_checkpoint(policy, optimizer, args.resume) policy.train() env = gym.make("Pong-v0") encode_action.n_actions = args.num_actions reward_ema = ExponentialMovingAvg(args.reward_eam_factor) episode_number = 0 while True: with GameProgressBar(episode_number) as progress_bar: experience, episode_reward = collect_experience( env, policy, args.render, progress_bar, args.debug_diagram_steps) episode_number += 1 policy_loss, H, lt_entropy = compute_and_accumulate_policy_gradients( experience) if args.debug_diagram_steps is not None: # Draw the backward graph and exit torchviz.make_dot(policy_loss, params=dict( policy.named_parameters())).render( "pg_loss_backward", format="png") exit() reward_ema.update(episode_reward) writer.log_kvdict( { 'episode_reward': episode_reward, 'running_return': reward_ema.value, 'policy_loss': policy_loss.item(), 'ep_action_entropy': H.item(), 'lt_action_entropy': lt_entropy.item() }, episode_number) print(' '.join(['reward=%d', 'running mean=%.2f', 'loss=%f']) % (episode_reward, reward_ema.value, policy_loss.item()), end='') # Update policy parameters every batch_size episodes if episode_number % args.batch_size == 0: update_policy(policy, optimizer, episode_number, writer, args.log_params) if args.profile and episode_number > 1: return
def create_model(model_name, input_size=None, num_classes=None, bench_task='', pretrained=False, checkpoint_path='', checkpoint_ema=False, **kwargs): config = get_efficientdet_config(model_name) if num_classes is not None: config.num_classes = num_classes if input_size is not None: config.image_size = input_size pretrained_backbone_path = kwargs.pop('pretrained_backbone_path', '') if pretrained or checkpoint_path: pretrained_backbone_path = '' # no point in loading backbone weights strict_load = kwargs.pop('strict_load', True) redundant_bias = kwargs.pop('redundant_bias', None) if redundant_bias is not None: # override config if set to something config.redundant_bias = redundant_bias soft_nms = kwargs.pop('soft_nms', False) config.label_smoothing = kwargs.pop('label_smoothing', 0.1) remove_params = kwargs.pop('remove_params', []) freeze_layers = kwargs.pop('freeze_layers', []) config.fused_focal_loss = kwargs.pop('fused_focal_loss', False) model = EfficientDet(config, pretrained_backbone_path=pretrained_backbone_path, **kwargs) # FIXME handle different head classes / anchors and re-init of necessary layers w/ pretrained load if checkpoint_path: load_checkpoint(model, checkpoint_path, use_ema=checkpoint_ema, strict=strict_load, remove_params=remove_params) if len(freeze_layers) > 0: freeze_layers_fn(model, freeze_layers=freeze_layers) # wrap model in task specific bench if set if bench_task == 'train': model = DetBenchTrain(model, config) elif bench_task == 'predict': model = DetBenchPredict(model, config, soft_nms) return model
def test_only(model, train_dataloader, val_dataloader, optimizer, loss_fn, metrics, params, model_dir, logger, restore_file=None): # reload weights from restore_file if specified if restore_file is not None: logging.info("Restoring parameters from {}".format(restore_file)) checkpoint = utils.load_checkpoint(restore_file, model, optimizer) best_val_acc = checkpoint['best_val_acc'] params.current_epoch = checkpoint['epoch'] print('best_val_acc=', best_val_acc, flush=True) print(optimizer.state_dict()['param_groups'][0]['lr'], checkpoint['epoch'], flush=True) model.eval() train_metrics = evaluate(model, loss_fn, train_dataloader, metrics, params, logger) model.eval() val_metrics = evaluate(model, loss_fn, val_dataloader, metrics, params, logger) pass
def main(opt: Options): model = models.create_model(opt) print("Training with network:") print(model) writer = SummaryWriter(opt.checkpoints_dir) train_dl = data.create_data_loader(opt, "train") test_dl = data.create_data_loader(opt, "test") criterion = getattr(common, opt.criterion)(*opt.criterion_args) temperature = TemperatureScheduler(*opt.temperature) optimizer = getattr(torch.optim, opt.optimizer)(model.parameters(), *opt.optimizer_args) scheduler = getattr(torch.optim.lr_scheduler, opt.scheduler)(optimizer, *opt.scheduler_args) device = torch.device(opt.device) model, epoch, optimizer, scheduler = load_checkpoint(opt.checkpoint_path, model, optimizer, scheduler, device) if opt.is_classification: test_metric = test_accuracy metric_name = 'Accuracy' else: # segmentation test_metric = partial(test_segmentation, n_classes=opt.n_classes) metric_name = 'mIoU' print('Setting up complete, starting training') for ep in range(epoch + 1, opt.max_epoch+1): train_epoch(ep, model, criterion, temperature, optimizer, train_dl, device, writer, batch_average=opt.batch_average) test_score = test_metric(model, temperature.get(ep), test_dl, device) writer.add_scalar(f"{metric_name}/test", test_score, ep * len(test_dl.dataset)) print(f"Test {metric_name} after {ep} epochs = {test_score}") scheduler.step() if ep % opt.save_freq == 0: save_checkpoint(model, optimizer, scheduler, ep, opt)
def valid(val_loader, val_cls_list, val_attrs, model_dir): # model = resnet50(pretrained=False, cut_at_pooling=False, num_features=1024, norm=False, dropout=0, num_classes=30) # model = get_network(num_classes=30, depth=50) model = ResNet50M(num_classes=30) model = nn.DataParallel(model).cuda() checkpoint = load_checkpoint( osp.join(model_dir, 'checkpoint_64_33.pth.tar')) model.module.load_state_dict(checkpoint['state_dict']) model.eval() feat, label = [], [] for i, d in enumerate(val_loader): imgs, _, l = d inputs = Variable(imgs) _, outputs = model(inputs) feat.append(outputs.data.cpu().numpy()) label.extend(l) feat = np.vstack(feat) # name = name.hstack(name) dist = compute_dist(feat, val_attrs, 'cosine') result = [] for i, v in enumerate(dist): max = v.max() v = list(v) index = v.index(max) result.append((int(label[i]), val_cls_list[index])) n = 0 for tar, pre in result: if pre == tar: n = n + 1 print('the acc is {}/{}'.format(n, len(result))) return n
def __init__(self, image_width, image_height, n_channels, n_classes, checkpoint_path): self._image_height = image_height self._image_width = image_width self._n_channels = n_channels self._n_classes = n_classes self._checkpoint_path = load_checkpoint(checkpoint_path)
def predict(**cfg): # Create a test loader loader = transforms.Compose([ transforms.Resize(size=256), transforms.CenterCrop(size=224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # Load the image as a Pytorch Tensor image = loader(Image.open( cfg["predict"]["image_path"]).convert("RGB")).float().unsqueeze(0) # Load Model model = load_checkpoint(**cfg) # Check GPU availability train_gpu, _ = check_gpu() if train_gpu: image = image.cuda() # Get model prediction output = model(image) _, pred = torch.max(output, 1) # Check prediction - Normal or Pneumonia print("Prediction: " + str(model.idx_to_class[pred]))
def test(test_loader, test_cls_list, test_attrs, model_dir): model = resnet50(pretrained=False, cut_at_pooling=False, num_features=1024, norm=False, dropout=0, num_classes=30) model = nn.DataParallel(model).cuda() checkpoint = load_checkpoint(osp.join(model_dir, 'checkpoint.pth.tar')) model.module.load_state_dict(checkpoint['state_dict']) model.eval() feat, name = [], [] for i, d in enumerate(test_loader): imgs, fnames, _ = d inputs = Variable(imgs) _, outputs = model(inputs) # outputs = F.sigmoid(outputs) feat.append(outputs.data.cpu().numpy()) name.extend(fnames) feat = np.vstack(feat) # name = name.hstack(name) dist = compute_dist(feat, test_attrs, 'cosine') result = [] for i, v in enumerate(dist): max = v.max() v = list(v) index = v.index(max) result.append((int(name[i][:3]), test_cls_list[index])) n = 0 for pre, tar in result: if pre == tar: n = n+1 print('the acc is {}/{}'.format(n, len(result)))
def test_only(model,train_dataloader, val_dataloader, optimizer, loss_fn, metrics, params, model_dir,logger,restore_file=None): # reload weights from restore_file if specified if restore_file is not None: logging.info("Restoring parameters from {}".format(restore_file)) checkpoint = utils.load_checkpoint(restore_file, model, optimizer) best_val_acc = checkpoint['best_val_acc'] params.current_epoch = checkpoint['epoch'] print('best_val_acc=',best_val_acc) print(optimizer.state_dict()['param_groups'][0]['lr'], checkpoint['epoch']) train_confusion_logger = VisdomLogger('heatmap', port=port, opts={'title': params.experiment_path + 'train_Confusion matrix', 'columnnames': columnnames, 'rownames': rownames},env='Test') test_confusion_logger = VisdomLogger('heatmap', port=port, opts={'title': params.experiment_path + 'test_Confusion matrix', 'columnnames': columnnames, 'rownames': rownames},env='Test') diff_confusion_logger = VisdomLogger('heatmap', port=port, opts={'title': params.experiment_path + 'diff_Confusion matrix', 'columnnames': columnnames, 'rownames': rownames},env='Test') # Evaluate for one epoch on validation set # model.train() model.eval() train_metrics, train_confusion_meter = evaluate(model, loss_fn, train_dataloader, metrics, params, logger) train_confusion_logger.log(train_confusion_meter.value()) model.eval() val_metrics,test_confusion_meter = evaluate(model, loss_fn, val_dataloader, metrics, params, logger) test_confusion_logger.log(test_confusion_meter.value()) diff_confusion_meter = train_confusion_meter.value()-test_confusion_meter.value() diff_confusion_logger.log(diff_confusion_meter) pass
def test(args, data, test_csv, hdf5): data_all = test_csv.values test_len = data_all.shape[0] test_set = TrainSet(data_all, hdf5, data.word_matrix, data.word2idx, data.ans2idx) test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False, pin_memory=True) model = TgifModel() model = nn.DataParallel(model).cuda() checkpoint = load_checkpoint( osp.join(args.model_dir, 'checkpoint_best.pth.tar')) model.module.load_state_dict(checkpoint['state_dict']) model.eval() acc_all = 0 for j, d in enumerate(test_loader): video_features, question_embeds, ql, ans_labels = d imgs = Variable(video_features.cuda()) # question_embeds, ql = pack_paded_questions(question_embeds, ql) question_embeds = torch.stack(question_embeds, 1).cuda() questions_embed = Variable(question_embeds) ans_labels = Variable(ans_labels.cuda()) ans_scores = model(imgs, questions_embed, ql) _, preds = torch.max(ans_scores, 1) acc = torch.sum((preds == ans_labels).data) acc_all += acc if j % args.print_freq == 0: print('test img {} acc is : {}'.format(j, acc)) print('test acc is : {:06f}'.format(int(acc_all) / int(test_len))) return acc_all
def vanilla_policy_gradient(args, reward_ema=None, writer=None): """This is where the VPG magic happens""" device = get_device(args.device) policy = KarpathyPongPolicy(device, PRE_PROCESS_OUTPUT_DIM**2, args.num_actions) optimizer = torch.optim.RMSprop(policy.parameters(), lr=args.policy_lr, alpha=args.rms_decay_rate, centered=True) if args.resume: load_checkpoint(policy, optimizer, args.resume) policy.train() env = gym.make("Pong-v0") encode_action.n_actions = args.num_actions rank = dist.get_rank() episode_number = rank progress_bar = None world_size = dist.get_world_size() while True: optimizer.zero_grad() for _ in range(args.batch_size // world_size): experience, episode_reward = collect_experience(env, policy, args.render, progress_bar) # optimizer.zero_grad() policy_loss, H, lt_entropy = compute_and_accumulate_policy_gradients(experience) reward_ema.update(episode_reward) if writer is not None: writer.log_kvdict({'episode_reward': episode_reward, 'running_return': reward_ema.value_mt(), 'policy_loss': policy_loss.item(), 'ep_action_entropy': H.item(), 'lt_action_entropy': lt_entropy.item()}, episode_number) print(f'episode={episode_number} ' f'reward={episode_reward} running mean={reward_ema.value_mt():.2f} ' f'loss={policy_loss.item():.5f}', end='\n') episode_number += world_size # Update policy parameters every batch_size episodes reduce_gradients(policy, args.gradient_reduce, args.batch_size) if rank == 0: # Emit a log only for the Master process print('\nUpdating policy...') optimizer.step()
def init_evaluation_container(self): # Define network self.model = generate_net(self.args.models) # # Resuming checkpoint state_dict, _, _, _ = load_checkpoint(checkpoint_path=self.args.evaluation.resume_eval) load_pretrained_model(self.model, state_dict) self.model = self.model.cuda()
def load_checkpoint(self, best=False): state = utils.load_checkpoint(self.dirpath, best) if state is not None: msg = 'load checkpoint successly: %s \n' % self.dirpath logging.info(msg) self.epoch = state['epoch'] + 1 self.best_prec = state['best_prec'] self.model.load_state_dict(state['state_dict']) self.optim.load_state_dict(state['optim'])
def train_and_evaluate(model, train_dl, valid_dl, test_dl, optimizer, loss_fn, cfg, writer, experiment_dir, checkpoint_model=None): # If checkpoint given, reload parameters (and optimizer) if checkpoint_model is not None: checkpoint_path = os.path.join(experiment_dir, checkpoint_model + '.pth.tar') logging.info("Restoring parameters from {}".format(checkpoint_path)) utils.load_checkpoint(checkpoint_path, model, optimizer) valid_loss_min = np.Inf for epoch in range(cfg.TRAIN.NUM_EPOCHS): logging.info("Epoch {}/{}".format(epoch + 1, cfg.TRAIN.NUM_EPOCHS)) train_loss, valid_loss = do_train(model, optimizer, loss_fn, cfg, epoch, writer, train_dl, valid_dl) test_loss, accuracy = do_evaluate(model, loss_fn, cfg, epoch, writer, test_dl) # Log epoch metrics logging.info( 'Train loss: {}\nValid loss: {}\nTest loss: {}\nAccuracy: {}%\n'. format(round(train_loss.item(), 3), round(valid_loss.item(), 3), round(test_loss.item(), 3), accuracy)) # Save parameters is_best = valid_loss <= valid_loss_min utils.save_checkpoint(state={ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optim_dict': optimizer.state_dict() }, is_best=is_best, experiment_dir=experiment_dir)
def main(args): # create configuration config = Config(hist_net=args.hist_net) # set seed torch.manual_seed(config.seed) np.random.seed(config.seed) # prepare dataloaders print("===> Creating the dataloaders...") # train/val/test loader not having restriction on number of user history (when config.is_history=True) # also invoked when training baseline models (i.e. config.is_history=False) train_loader, val_loader, test_loader = get_loader(config) # create model print("===> Creating the model (is_history={})".format( str(config.is_history))) model = CombinedNet16s(is_history=config.is_history, is_dec_affine=config.is_dec_affine, is_insnorm_layer=config.is_insnorm_layer, hist_net_type=config.hist_net).cuda() print(model) # check if resuming training if args.resume: try: print('\n===> Loading checkpoint to resume:{}'.format( args.checkpoint)) load_checkpoint(model, args.checkpoint) except ValueError: print("args.checkpoint is empty or not valid path!") else: print("\n===> No re-training...training from scratch!") # optimizer optimizer = optim.Adam(model.parameters(), lr=config.lr) print("===> Training initiated..") train(config, model, train_loader, val_loader, optimizer)
def test(args): # np.random.seed(args.seed) # torch.manual_seed(args.seed) # cudnn.benchmark = True test_loader, test_cls_list, test_attrs = get_test_data( args.data_dir, args.batch_size) model = resnet50(pretrained=False, cut_at_pooling=False, num_features=1024, norm=False, dropout=0, num_classes=300) # model = ResNet50M(num_classes=300) print(model) model = nn.DataParallel(model).cuda() checkpoint = load_checkpoint( osp.join(args.model_dir, 'checkpoint_bi1901_1402.pth.tar')) model.module.load_state_dict(checkpoint['state_dict']) model.eval() feat, name = [], [] for i, d in enumerate(test_loader): imgs, fnames = d inputs = Variable(imgs) _, outputs, _ = model(inputs) # outputs = F.sigmoid(outputs) feat.append(outputs.data.cpu().numpy()) name.extend(fnames) feat = np.vstack(feat) # name = name.hstack(name) dist = compute_dist(feat, test_attrs, 'cosine') result = [] for i, v in enumerate(dist): max = v.max() v = list(v) index = v.index(max) result.append('{}\tZJL{}'.format(name[i].strip(), test_cls_list[index])) print(result[:10]) s = open( '/home/stage/yuan/ZSL/submit_{}.txt'.format( datetime.datetime.now().strftime('%Y%m%d_%H%M%S')), 'w') for i in result: s.write(i + '\n') print(len(result)) print()
def pretrainedLoader(net, optimizer, epoch, path, mode='full', full_path=False): # load checkpoint if full_path == True: checkpoint = torch.load(path) else: checkpoint = load_checkpoint(path) # apply checkpoint if mode == 'full': net.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) # epoch = checkpoint['epoch'] epoch = checkpoint['n_iter'] # epoch = 0 else: net.load_state_dict(checkpoint) # net.load_state_dict(torch.load(path,map_location=lambda storage, loc: storage)) return net, optimizer, epoch
def __init__(self): params = utils.Params(_json_file) self.model = HCN.HCN(**params.model_args) self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.model.parameters()), lr=params.lr, betas=(0.9, 0.999), eps=1e-8, weight_decay=params.weight_decay) out_channel = params.model_args['out_channel'] window_size = params.model_args['window_size'] self.model.fc7 = nn.Sequential( nn.Linear((out_channel * 4)*(window_size//16)*(window_size//16), 256), nn.ReLU(), nn.Dropout2d(p=0.5)) self.model.fc8 = nn.Linear(256, 12) checkpoint = utils.load_checkpoint(_chkp_path, self.model, self.optimizer) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model = self.model.to(self.device) self.model.eval()
def _init_model(self, args): print("Loading model: " + args.model_name) encoder_dict, decoder_dict, _, _, load_args = load_checkpoint( args.model_name, args.use_gpu) load_args.use_gpu = args.use_gpu self.encoder = FeatureExtractor(load_args) if args.zero_shot: self.decoder = RSIS(load_args) else: self.decoder = RSISMask(load_args) print(load_args) if args.ngpus > 1 and args.use_gpu: self.decoder = torch.nn.DataParallel(self.decoder, device_ids=range(args.ngpus)) self.encoder = torch.nn.DataParallel(self.encoder, device_ids=range(args.ngpus)) encoder_dict, decoder_dict = check_parallel(encoder_dict, decoder_dict) self.encoder.load_state_dict(encoder_dict) to_be_deleted_dec = [] for k in decoder_dict.keys(): if 'fc_stop' in k: to_be_deleted_dec.append(k) for k in to_be_deleted_dec: del decoder_dict[k] self.decoder.load_state_dict(decoder_dict) if args.use_gpu: self.encoder.cuda() self.decoder.cuda() self.encoder.eval() self.decoder.eval() if load_args.length_clip == 1: self.video_mode = False print('video mode not activated') else: self.video_mode = True print('video mode activated')
def valid(args, val_loader): model = TgifModel() model = nn.DataParallel(model).cuda() checkpoint = load_checkpoint(osp.join(args.model_dir, 'checkpoint.pth.tar')) model.module.load_state_dict(checkpoint['state_dict']) model.eval() acc_all = 0 for j, d in enumerate(val_loader): imgs, question_embeds, ql, ans_labels, _ = d imgs = Variable(imgs.cuda()) questions_embed = Variable(torch.stack(question_embeds, 1).cuda()) ans_labels = Variable(ans_labels.cuda()) ans_scores = model(imgs, questions_embed) _, preds = torch.max(ans_scores, 1) acc = torch.sum((preds == ans_labels).data) acc_all += acc if j % args.print_freq == 0: print('val img {} acc is : {:06d}'.format(j, acc)) return acc_all
def init_training_container(self): # Define dataset self.train_set = self.Dataset_train(self.args.dataset, split='train') self.val_set = self.Dataset_val(self.args.dataset, split='val') self.test_set = self.Dataset_val(self.args.dataset, split='test') # Define network self.model = generate_net(self.args.models) self.model = self.model.cuda() start_it = 0 stage = 0 # # Resuming checkpoint if self.args.training.resume_train is not None and os.path.exists(self.args.training.resume_train): state_dict, optimizer, start_it, stage = load_checkpoint(checkpoint_path=self.args.training.resume_train) self.gen_optimizer(self.model.param_groups(), (stage + 1) // 2) if isinstance(self.args.training.batchsize, list): self.batchsize = self.args.training.batchsize[(stage + 1) // 2] else: self.batchsize = self.args.training.batchsize load_pretrained_model(self.model, state_dict) self.model = self.model.cuda() if not self.args.training.ft and optimizer is not None: for name in self.args.training.optimizer.keys(): if name in optimizer.keys(): self.optimizer[name].load_state_dict(optimizer[name]) else: start_it = 0 else: self.gen_optimizer(self.model.param_groups(), (stage + 1) // 2) self.start_epoch = 0 self.start_it = start_it self.stage = stage # Define Criterion self.criterion = MMLoss(self.args.training)
def __init__(self, image_width, image_height, n_channels, n_classes, batch_size, checkpoint_path): super().__init__(image_width=image_width, image_height=image_height, n_channels=n_channels, n_classes=n_classes, batch_size=batch_size) self._checkpoint_path = load_checkpoint(checkpoint_path) self._session = None tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) self._graph = tf.Graph() with self._graph.as_default(): # Prepare graph self._x_input = tf.compat.v1.placeholder( tf.float32, shape=[None, image_height, image_width, n_channels]) logits = self.calculate_logits(self._x_input) self._predicted_labels = tf.argmax(logits, 1)
def main(): if config.output_dir is None: config.output_dir = 'output' if config.restart_training: shutil.rmtree(config.output_dir, ignore_errors=True) if not os.path.exists(config.output_dir): os.makedirs(config.output_dir) logger = setup_logger(os.path.join(config.output_dir, 'train_log')) logger.info(config.print()) torch.manual_seed(config.seed) # 为CPU设置随机种子 if config.gpu_id is not None and torch.cuda.is_available(): torch.backends.cudnn.benchmark = True logger.info('train with gpu {} and pytorch {}'.format( config.gpu_id, torch.__version__)) device = torch.device("cuda:0") torch.cuda.manual_seed(config.seed) # 为当前GPU设置随机种子 torch.cuda.manual_seed_all(config.seed) # 为所有GPU设置随机种子 else: logger.info('train with cpu and pytorch {}'.format(torch.__version__)) device = torch.device("cpu") train_data = TibetanDataset(config.json_path, data_shape=config.data_shape, n=config.n, m=config.m, transform=transforms.ToTensor(), base_path=config.base_path) train_loader = Data.DataLoader(dataset=train_data, batch_size=config.train_batch_size, shuffle=True, num_workers=int(config.workers)) writer = SummaryWriter(config.output_dir) model = PSENet(backbone=config.backbone, pretrained=config.pretrained, result_num=config.n, scale=config.scale) if not config.pretrained and not config.restart_training: model.apply(weights_init) num_gpus = torch.cuda.device_count() if num_gpus > 1: model = nn.DataParallel(model) model = model.to(device) # dummy_input = torch.autograd.Variable(torch.Tensor(1, 3, 600, 800).to(device)) # writer.add_graph(models=models, input_to_model=dummy_input) criterion = PSELoss(Lambda=config.Lambda, ratio=config.OHEM_ratio, reduction='mean') # optimizer = torch.optim.SGD(models.parameters(), lr=config.lr, momentum=0.99) optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) if config.checkpoint != '' and not config.restart_training: start_epoch = load_checkpoint(config.checkpoint, model, logger, device, optimizer) start_epoch += 1 scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.lr_decay_step, gamma=config.lr_gamma, last_epoch=start_epoch) else: start_epoch = config.start_epoch scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, config.lr_decay_step, gamma=config.lr_gamma) all_step = len(train_loader) logger.info('train dataset has {} samples,{} in dataloader'.format( train_data.__len__(), all_step)) epoch = 0 best_model = {'recall': 0, 'precision': 0, 'f1': 0, 'models': ''} try: for epoch in range(start_epoch, config.epochs): start = time.time() train_loss, lr = train_epoch(model, optimizer, scheduler, train_loader, device, criterion, epoch, all_step, writer, logger) logger.info( '[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format( epoch, config.epochs, train_loss, time.time() - start, lr)) # net_save_path = '{}/PSENet_{}_loss{:.6f}.pth'.format(config.output_dir, epoch, # train_loss) # save_checkpoint(net_save_path, models, optimizer, epoch, logger) if (0.3 < train_loss < 0.4 and epoch % 4 == 0) or train_loss < 0.3: recall, precision, f1 = merge_eval(model=model, save_path=os.path.join( config.output_dir, 'output'), test_path=config.testroot, device=device, base_path=config.base_path, use_sub=config.use_sub) logger.info( 'test: recall: {:.6f}, precision: {:.6f}, f1: {:.6f}'. format(recall, precision, f1)) net_save_path = '{}/PSENet_{}_loss{:.6f}_r{:.6f}_p{:.6f}_f1{:.6f}.pth'.format( config.output_dir, epoch, train_loss, recall, precision, f1) save_checkpoint(net_save_path, model, optimizer, epoch, logger) if f1 > best_model['f1']: best_path = glob.glob(config.output_dir + '/Best_*.pth') for b_path in best_path: if os.path.exists(b_path): os.remove(b_path) best_model['recall'] = recall best_model['precision'] = precision best_model['f1'] = f1 best_model['models'] = net_save_path best_save_path = '{}/Best_{}_r{:.6f}_p{:.6f}_f1{:.6f}.pth'.format( config.output_dir, epoch, recall, precision, f1) if os.path.exists(net_save_path): shutil.copyfile(net_save_path, best_save_path) else: save_checkpoint(best_save_path, model, optimizer, epoch, logger) pse_path = glob.glob(config.output_dir + '/PSENet_*.pth') for p_path in pse_path: if os.path.exists(p_path): os.remove(p_path) writer.add_scalar(tag='Test/recall', scalar_value=recall, global_step=epoch) writer.add_scalar(tag='Test/precision', scalar_value=precision, global_step=epoch) writer.add_scalar(tag='Test/f1', scalar_value=f1, global_step=epoch) writer.close() except KeyboardInterrupt: save_checkpoint('{}/final.pth'.format(config.output_dir), model, optimizer, epoch, logger) finally: if best_model['models']: logger.info(best_model)
elif (args.net_G_type == "pix2pixhd_spade"): model_G = Pix2PixHDSPADEGenerator(input_nc=args.n_classes, output_nc=3).to(device) else: NotImplementedError() model_D = PatchGANDiscriminator(n_in_channels=3 + args.n_classes, n_fmaps=64).to(device) if (args.debug): print("model_G\n", model_G) print("model_D\n", model_D) # モデルを読み込む if not args.load_checkpoints_G_path == '' and os.path.exists( args.load_checkpoints_G_path): load_checkpoint(model_G, device, args.load_checkpoints_G_path) if not args.load_checkpoints_D_path == '' and os.path.exists( args.load_checkpoints_D_path): load_checkpoint(model_D, device, args.load_checkpoints_D_path) #================================ # optimizer_G の設定 #================================ optimizer_G = optim.Adam(params=model_G.parameters(), lr=args.lr, betas=(args.beta1, args.beta2)) optimizer_D = optim.Adam(params=model_D.parameters(), lr=args.lr, betas=(args.beta1, args.beta2)) #================================
pin_memory=params.cuda) if 'HCN' in params.model_version: model = HCN.HCN(**params.model_args) if params.data_parallel: model = torch.nn.DataParallel(model).cuda() else: model = model.cuda(params.gpu_id) loss_fn = HCN.loss_fn metrics = HCN.metrics ### load model restore_file = params.restore_file if restore_file is not None: checkpoint = utils.load_checkpoint(restore_file, model) best_val_acc = checkpoint['best_val_acc'] params.current_epoch = checkpoint['epoch'] print('best_val_acc=', best_val_acc) else: raise evaluate(model, loss_fn, test_dl, metrics, params, sigma=args.sigma, apply_defense=args.apply_defense)
def main(): # for reproduciblity random_seed = 2020 random.seed(random_seed) np.random.seed(random_seed) torch.manual_seed(random_seed) torch.cuda.manual_seed(random_seed) cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED args = parse_args() # model loading model = get_pose_net( config, is_train=True ) # model = model.half() model = model.to("cuda" if torch.cuda.is_available() else "cpu") valid_dataset = coco( config, config.DATASET.ROOT, config.DATASET.TEST_SET, is_train=False, is_eval=True, transform=tfms.Compose([ tfms.ToTensor(), ]) ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE, shuffle=False, # num_workers=confi g.WORKERS, # pin_memory=True drop_last=False, ) if config.MODEL.CHECKPOINT is not None: info = load_checkpoint(config.MODEL.CHECKPOINT) if info is not None: _, model_dic, _, _ = info try: model.load_state_dict(model_dic) logging.info('Model Loaded.\n') except Exception as e: raise FileNotFoundError('Model shape is different. Plz check.') end = time.time() logging.info('Evaluation Ready\n') result = evaluate(config, model, valid_loader) with open(f'{config.result_dir}/data.json', 'w') as f: json.dump(result, f) logging.info(f"Taken {time.time()-end:.5f}s\n") os.makedirs(config.result_dir, exist_ok=True) logging.info(f"From a Pose estimator.\n") valid_dataset.keypoint_eval('/home/mah/workspace/PoseFix/data/input_pose_path/keypoints_valid2017_results.json', config.result_dir + '/ori/') logging.info(f"Pose Estimator with PoseFix.\n") valid_dataset.keypoint_eval(result, config.result_dir + '/pred')
max_len, device, ).to(device) optimizer = optim.Adam(model.parameters(), lr=learning_rate) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, verbose=True) pad_idx = input_text.vocab.stoi["<pad>"] criterion = nn.CrossEntropyLoss(ignore_index=pad_idx) if load_model: load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer) sentence = "Emma Woodhouse, handsome, clever, and rich, with a comfortable home" # Output should be: and happy disposition, seemed to unite some of the best blessings if graph: plt.ion() bleu_scores = [] big_iterator = foldify(big_iterator[0], k_folds) big_data = foldify(big_data[0], k_folds) k = 0 prev_score = float('-inf') biggest_score = 0.13 for epoch in range(num_epochs): k += 1