示例#1
0
def test(args):
    model_path = sorted(glob(os.path.join('ckpt', args.tag, '*.pth')))[-1]
    model = torch.load(model_path, map_location='cpu').eval()
    print('Loaded model: {}'.format(model_path))
    model_name = os.path.basename(os.path.splitext(model_path)[0])
    # initialize video writer
    video_filename = 'output_{}_{}.avi'.format(args.tag, model_name)

    dict_screen_shape = {"flappy": (288, 512), "pixelcopter": (48, 48)}
    out = Recorder(video_filename=video_filename,
                   fps=30,
                   width=dict_screen_shape[args.game][0],
                   height=dict_screen_shape[args.game][1])
    score_list = []
    time_list = []

    game = Game(game=args.game)
    for trials in range(10):

        elapsed_Time = 0
        action = torch.zeros([model.number_of_actions], dtype=torch.float32)
        terminal = game.game_over()
        start = time.time()
        score = 0

        image_data = game.get_torch_image()
        state = torch.cat(
            (image_data, image_data, image_data, image_data)).unsqueeze(0)
        while not terminal:
            output = model(state)[0]
            action = torch.zeros([model.number_of_actions],
                                 dtype=torch.float32)
            action_index = torch.argmax(output)
            score += game.act(action_index)
            terminal = game.game_over()
            image_data_1 = game.get_torch_image()
            state = torch.cat(
                (state.squeeze(0)[1:, :, :], image_data_1)).unsqueeze(0)

            out.write(game.get_image())

        game.reset_game()
        score_list.append(score)
        time_list.append(time.time() - start)
        print('Game Ended!')
        print('Score: {} !'.format(score))

    # Add summary
    out.write_score(sum(score_list), sum(time_list))
    out.save()
    print('Total Score: {}'.format(sum(score_list)))
    print('Total Run Time: {:.3f}'.format(sum(time_list)))
    print('Saved video: {}'.format(video_filename))
示例#2
0
文件: train.py 项目: leibo-cmu/MatSeg
def train(args):
    Arguments.save_args(args, args.args_path)
    train_loader, val_loader, _ = get_dataloaders(args)
    model = UNetVgg16(n_classes=args.n_classes).to(args.device)
    optimizer = get_optimizer(args.optimizer, model)
    lr_scheduler = LRScheduler(args.lr_scheduler, optimizer)
    criterion = get_loss_fn(args.loss_type, args.ignore_index).to(args.device)
    model_saver = ModelSaver(args.model_path)
    recorder = Recorder(['train_miou', 'train_acc', 'train_loss',
                         'val_miou', 'val_acc', 'val_loss'])
    for epoch in range(args.n_epochs):
        print(f"{args.experim_name} Epoch {epoch+1}:")
        train_loss, train_acc, train_miou, train_ious = train_epoch(
            model=model,
            dataloader=train_loader,
            n_classes=args.n_classes,
            optimizer=optimizer,
            lr_scheduler=lr_scheduler,
            criterion=criterion,
            device=args.device,
        )
        print(f"train | mIoU: {train_miou:.3f} | accuracy: {train_acc:.3f} | loss: {train_loss:.3f}")
        val_loss, val_scores = eval_epoch(
            model=model,
            dataloader=val_loader,
            n_classes=args.n_classes,
            criterion=criterion,
            device=args.device,
        )
        val_miou, val_ious, val_acc = val_scores['mIoU'], val_scores['IoUs'], val_scores['accuracy']
        print(f"valid | mIoU: {val_miou:.3f} | accuracy: {val_acc:.3f} | loss: {val_loss:.3f}")
        recorder.update([train_miou, train_acc, train_loss, val_miou, val_acc, val_loss])
        recorder.save(args.record_path)
        if args.metric.startswith("IoU"):
            metric = val_ious[int(args.metric.split('_')[1])]
        else: metric = val_miou
        model_saver.save_models(metric, epoch+1, model,
                                ious={'train': train_ious, 'val': val_ious})

    print(f"best model at epoch {model_saver.best_epoch} with miou {model_saver.best_score:.5f}")
示例#3
0
def run(rank, args):
    base_setting(args)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)
    gpuid = args.gpuid[rank]
    is_master = rank == 0
    is_mp = len(args.gpuid) > 1
    world_size = len(args.gpuid)
    if is_master:
        recorder = Recorder(args.log)
    tok = BertTokenizer.from_pretrained(args.model_type)
    if args.use_ids:
        collate_fn = partial(collate_mp_ids,
                             pad_token_id=tok.pad_token_id,
                             is_test=False)
        collate_fn_val = partial(collate_mp_ids,
                                 pad_token_id=tok.pad_token_id,
                                 is_test=True)
        train_set = RefactoringIDsDataset(
            f"./{args.dataset}/{args.datatype}/train",
            args.model_type,
            maxlen=args.max_len,
            max_num=args.max_num)
        val_set = RefactoringIDsDataset(
            f"./{args.dataset}/{args.datatype}/val",
            args.model_type,
            is_test=True,
            maxlen=512,
            is_sorted=False)
    else:
        collate_fn = partial(collate_mp,
                             pad_token_id=tok.pad_token_id,
                             is_test=False)
        collate_fn_val = partial(collate_mp,
                                 pad_token_id=tok.pad_token_id,
                                 is_test=True)
        train_set = RefactoringDataset(
            f"./{args.dataset}/{args.datatype}/train",
            args.model_type,
            maxlen=args.max_len,
            maxnum=args.max_num)
        val_set = RefactoringDataset(f"./{args.dataset}/{args.datatype}/val",
                                     args.model_type,
                                     is_test=True,
                                     maxlen=512,
                                     is_sorted=False,
                                     maxnum=args.max_num)
    if is_mp:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_set, num_replicas=world_size, rank=rank, shuffle=True)
        dataloader = DataLoader(train_set,
                                batch_size=args.batch_size,
                                shuffle=False,
                                num_workers=4,
                                collate_fn=collate_fn,
                                sampler=train_sampler)
        val_sampler = torch.utils.data.distributed.DistributedSampler(
            val_set, num_replicas=world_size, rank=rank)
        val_dataloader = DataLoader(val_set,
                                    batch_size=8,
                                    shuffle=False,
                                    num_workers=4,
                                    collate_fn=collate_fn_val,
                                    sampler=val_sampler)
    else:
        dataloader = DataLoader(train_set,
                                batch_size=args.batch_size,
                                shuffle=True,
                                num_workers=4,
                                collate_fn=collate_fn)
        val_dataloader = DataLoader(val_set,
                                    batch_size=8,
                                    shuffle=False,
                                    num_workers=4,
                                    collate_fn=collate_fn_val)
    # build models
    model_path = args.pretrained if args.pretrained is not None else args.model_type
    model = Refactor(model_path, num_layers=args.num_layers)

    if args.model_pt is not None:
        model.load_state_dict(
            torch.load(args.model_pt, map_location=f'cuda:{gpuid}'))
    if args.cuda:
        if len(args.gpuid) == 1:
            model = model.cuda()
        else:
            dist.init_process_group("nccl", rank=rank, world_size=world_size)
            model = nn.parallel.DistributedDataParallel(
                model.to(gpuid), [gpuid], find_unused_parameters=True)
    model.train()
    init_lr = args.max_lr / args.warmup_steps
    optimizer = optim.Adam(model.parameters(), lr=init_lr)
    if is_master:
        recorder.write_config(args, [model], __file__)
    minimum_loss = 100
    all_step_cnt = 0
    # start training
    for epoch in range(args.epoch):
        optimizer.zero_grad()
        step_cnt = 0
        steps = 0
        avg_loss = 0
        for (i, batch) in enumerate(dataloader):
            if args.cuda:
                to_cuda(batch, gpuid)
            step_cnt += 1
            output = model(batch["src_input_ids"], batch["candidate_ids"],
                           batch["tgt_input_ids"])
            similarity, gold_similarity = output['score'], output[
                'summary_score']
            loss = args.scale * RankingLoss(similarity,
                                            gold_similarity,
                                            args.margin,
                                            args.gold_margin,
                                            args.gold_weight,
                                            no_gold=args.no_gold)
            loss = loss / args.accumulate_step
            avg_loss += loss.item()
            loss.backward()
            if step_cnt == args.accumulate_step:
                if args.grad_norm > 0:
                    nn.utils.clip_grad_norm_(model.parameters(),
                                             args.grad_norm)
                step_cnt = 0
                steps += 1
                all_step_cnt += 1
                lr = args.max_lr * min(
                    all_step_cnt**(-0.5),
                    all_step_cnt * (args.warmup_steps**(-1.5)))
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                optimizer.step()
                optimizer.zero_grad()
            if steps % args.report_freq == 0 and step_cnt == 0 and is_master:
                recorder.print("epoch: %d, batch: %d, avg loss: %.6f" %
                               (epoch + 1, steps, avg_loss / args.report_freq))
                recorder.print(f"learning rate: {lr:.6f}")
                recorder.plot("loss", {"loss": avg_loss / args.report_freq},
                              all_step_cnt)
                recorder.print()
                avg_loss = 0
            del similarity, gold_similarity, loss

            if all_step_cnt % args.test_freq == 0 and all_step_cnt != 0 and step_cnt == 0:
                loss = test(val_dataloader, model, args, gpuid)
                if loss < minimum_loss and is_master:
                    minimum_loss = loss
                    if is_mp:
                        recorder.save(model.module, "model.bin")
                    else:
                        recorder.save(model, "model.bin")
                    recorder.save(optimizer, "optimizer.bin")
                    recorder.print("best - epoch: %d, batch: %d" %
                                   (epoch + 1, i / args.accumulate_step + 1))
                if is_master:
                    if is_mp:
                        recorder.save(model.module, "model_cur.bin")
                    else:
                        recorder.save(model, "model_cur.bin")
                    recorder.save(optimizer, "optimizer_cur.bin")
                    recorder.print("val score: %.6f" % (1 - loss))