示例#1
0
def main():
    os.makedirs(args.output_dir, exist_ok=True)
    print(args)
    if args.dataset_root == COCO_ROOT:
        parser.error('Must specify dataset if specifying dataset_root')
    cfg = voc

    start_time = time.time()
    config = {"args": args, "num_workers": args.num_workers, "cfg": cfg}
    trainer = TorchTrainer(model_creator=model_creator,
                           data_creator=data_creator,
                           optimizer_creator=optimizer_creator,
                           training_operator_cls=SegOperator,
                           use_tqdm=True,
                           use_fp16=False,
                           num_workers=config["num_workers"],
                           config=config,
                           use_gpu=torch.cuda.is_available())
    for epoch in range(args.epochs):
        trainer.train()
        state_dict = trainer.state_dict()
        state_dict.update(epoch=epoch, args=args)
        torch.save(state_dict,
                   os.path.join(args.output_dir, "model_{}.pth".format(epoch)))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print("Training time {}".format(total_time_str))
示例#2
0
def main(args):
    os.makedirs(args.output_dir, exist_ok=True)

    print(args)
    start_time = time.time()
    config = {"args": args, "num_workers": args.num_workers}
    trainer = TorchTrainer(training_operator_cls=SegOperator,
                           use_tqdm=True,
                           use_fp16=True,
                           num_workers=config["num_workers"],
                           config=config,
                           use_gpu=torch.cuda.is_available())

    for epoch in range(args.epochs):
        trainer.train()
        confmat = trainer.validate(reduce_results=False)[0]
        print(confmat)
        state_dict = trainer.state_dict()
        state_dict.update(epoch=epoch, args=args)
        torch.save(state_dict,
                   os.path.join(args.output_dir, f"model_{epoch}.pth"))

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print(f"Training time {total_time_str}")
示例#3
0
        num_workers=1,
        use_gpu=True,
        config={
            "lr": 1e-2,  # used in optimizer_creator
            "hidden_size": 1,  # used in model_creator
            "batch_size": 1024,  # used in data_creator
            "path": path1,  # path to load the agent nn
        },
        backend="auto",
        scheduler_step_freq="epoch")
    for i in range(100):
        stats = trainer1.train()
        print(stats)

    print(trainer1.validate())
    torch.save(trainer1.state_dict(), "checkpoint.pt")
    torch.save(trainer1.get_model().state_dict(), "invariant_checkpoint.pt")
    m = trainer1.get_model()
    print(f"trained weight: torch.tensor([[{m[0].weight.data.cpu().numpy()[0][0]},{m[0].weight.data.cpu().numpy()[0][1]}]]), bias: torch.tensor({m[0].bias.data.cpu().numpy()})")
    # trainer1.shutdown()
    print("success!")
else:
    m = torch.nn.Sequential(torch.nn.Linear(2, 50), torch.nn.ReLU(), torch.nn.Linear(50, 1), torch.nn.Tanh())
    checkpoint = torch.load("invariant_checkpoint.pt", torch.device("cpu"))
    m.load_state_dict(checkpoint)
    # trained weight:  [[0.0018693  0.05228069]], bias: [-0.5533147] , train_loss = 0.0
    # trained weight:  [[-0.01369903  0.03511396]], bias: [-0.6535952] , train_loss = 0.0
    # trained weight:  [[0.00687088  0.26634103]], bias: [-0.6658108] , train_loss = 0.0
    # trained weight: torch.tensor([[0.038166143000125885,0.16197167336940765]]), bias: torch.tensor([-2.3122551])
# %%
m.cpu()
示例#4
0
            config={
                "lr": 1e-2,  # used in optimizer_creator
                "hidden_size": 1,  # used in model_creator
                "batch_size": 1024,  # used in data_creator
                "path": path1,  # path to load the agent nn
                "path_invariant":
                path_invariant,  # the path to the invariant network
            },
            backend="auto",
            scheduler_step_freq="epoch")
        for i in range(50):
            stats = trainer1.train()
            print(stats)

        print(trainer1.validate())
        torch.save(trainer1.state_dict(),
                   os.path.join(utils.get_save_dir(), "checkpoint.pt"))
        torch.save(trainer1.get_model()[0].state_dict(),
                   os.path.join(utils.get_save_dir(), "retrained_agent.pt"))
        agent_model, invariant_model = trainer1.get_model()
    else:
        sequential_nn = convert_ray_policy_to_sequential(policy).cpu()
        sequential_nn.load_state_dict(
            torch.load(os.path.join(utils.get_save_dir(),
                                    "retrained_agent.pt")))
        agent_model = sequential_nn
        invariant_model = torch.nn.Sequential(torch.nn.Linear(2, 50),
                                              torch.nn.ReLU(),
                                              torch.nn.Linear(50, 1),
                                              torch.nn.Tanh())
        invariant_model.load_state_dict(