Exemplo n.º 1
0
    plt.savefig(fname)
    for i in range(0, len(x), 10):
        plt.text(
            x[i],
            y[i],
            str(i),
            horizontalalignment="center",
            verticalalignment="center",
            fontsize=14,
        )
    plt.savefig("num_" + fname)


if __name__ == "__main__":
    name = "MontezumaRevenge" if len(sys.argv) < 2 else sys.argv[1]
    env = make_vec_envs(name, 1)

    conv_wmse = mnih_cnn(1, 32)
    conv_idf = mnih_cnn(1, 32)
    conv_cpc = mnih_cnn(1, 32)
    conv_rnd = mnih_cnn(1, 32)
    conv_wmse.load_state_dict(
        torch.load("models/conv_wmse.pt", map_location="cpu"))
    conv_idf.load_state_dict(
        torch.load("models/conv_idf.pt", map_location="cpu"))
    conv_cpc.load_state_dict(
        torch.load("models/conv_cpc.pt", map_location="cpu"))
    conv_wmse.eval(), conv_idf.eval(), conv_cpc.eval(), conv_rnd.eval()

    mem = torch.empty(4, 1000, 32)
    cursor = 0
Exemplo n.º 2
0
from repre.inverse_dynamics import IDF
from repre.cpc import CPC

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="")
    parser.add_argument("--cfg", type=str, default="default")
    parser.add_argument("--env", type=str, default="MontezumaRevenge")
    p = parser.parse_args()
    cfg = load_cfg(p.cfg)
    cfg.update(vars(p))
    wandb.init(project="lwm", config=cfg)

    num_env = cfg["agent"]["actors"]
    fstack = cfg["agent"]["frame_stack"]
    envs = make_vec_envs(cfg["env"],
                         num_env,
                         max_ep_len=cfg["train"]["max_ep_len"])
    num_action = envs.action_space.n

    buffer = Buffer(
        num_env=num_env,
        maxlen=int(cfg["buffer"]["size"] / num_env),
        obs_shape=envs.observation_space.shape,
        device=cfg["buffer"]["device"],
    )
    wmse = WMSE(buffer, cfg)
    idf = IDF(buffer=buffer, num_action=num_action)
    cpc = CPC(buffer=buffer, num_action=num_action)
    actor = actor_iter(envs, None, None, cfg["buffer"]["warmup"], eps=1)

    pretrain = int(cfg["buffer"]["warmup"] / num_env)
Exemplo n.º 3
0

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="")
    parser.add_argument("--cfg", type=str, default="default")
    parser.add_argument("--env", type=str, default="MontezumaRevenge")
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--ri_scale", type=float, default=1)
    p = parser.parse_args()
    cfg = load_cfg(p.cfg)
    cfg.update(vars(p))
    wandb.init(project="lwm", config=cfg)

    num_env = cfg["agent"]["actors"]
    fstack = cfg["agent"]["frame_stack"]
    envs = make_vec_envs(cfg["env"], num_env, cfg["seed"], cfg["train"]["max_ep_len"])

    buffer = Buffer(
        num_env=num_env,
        maxlen=int(cfg["buffer"]["size"] / num_env),
        obs_shape=envs.observation_space.shape,
        device=cfg["buffer"]["device"],
    )
    model = DQN(envs.action_space.n, fstack).cuda().train()
    wmse = WMSE(buffer, cfg)
    pred = Predictor(buffer, wmse.encoder, envs.action_space.n, cfg)
    learner = Learner(model, buffer, pred, cfg)
    actor = actor_iter(
        envs, model, pred, cfg["buffer"]["warmup"], eps=cfg["agent"].get("eps")
    )
Exemplo n.º 4
0
from repre.predictor import Predictor

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="")
    parser.add_argument("--cfg", type=str, default="default")
    parser.add_argument("--env", type=str, default="MontezumaRevenge")
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--ri_scale", type=float, default=1)
    p = parser.parse_args()
    cfg = load_cfg(p.cfg)
    cfg.update(vars(p))
    wandb.init(project="lwm", config=cfg)

    num_env = cfg["agent"]["actors"]
    fstack = cfg["agent"]["frame_stack"]
    envs = make_vec_envs(cfg["env"], num_env, cfg["seed"])

    buffer = Buffer(
        num_env=num_env,
        maxlen=int(cfg["buffer"]["size"] / num_env),
        obs_shape=envs.observation_space.shape,
        device=cfg["buffer"]["device"],
    )
    model = DQN(envs.action_space.n, fstack).cuda().train()
    wmse = WMSE(buffer, cfg)
    pred = Predictor(buffer, wmse.encoder, envs.action_space.n, cfg)
    actor = actor_iter(envs, model, pred, 0, eps=0.001)

    wmse.load(), pred.load()
    cp = torch.load("models/dqn.pt", map_location="cuda")
    model.load_state_dict(cp)