Exemplo n.º 1
0
def main(cfg):
    env_name, use_zf, gamma, tau, policy_state, filter_state =\
        cfg.require("env name", "use zfilter", "advantage gamma", "advantage tau", "policy state dict", "filter state dict")

    logger = Logger()
    logger.init(cfg)

    filter_op = ZFilter(gamma, tau, enable=use_zf)
    env = FakeGym(env_name)
    policy = Policy(cfg, env.info())
    agent = Agent(cfg, env, policy, filter_op)

    # ---- start training ---- #
    if policy_state is not None:
        agent.policy().reset(policy_state)
    if filter_state is not None:
        agent.filter().reset(filter_state)

    train_loop(cfg, agent, logger)

    print("Done")
Exemplo n.º 2
0
def main(cfg):
    env_name, action_mode, policy_state, filter_state =\
        cfg.require("env name", "action mode", "policy state dict", "filter state dict")

    logger = Logger()
    logger.init(cfg)

    filter_op = Filter()
    # env = FakeGym(env_name)
    env = FakeRLBench(env_name, action_mode=action_mode)
    policy = Policy(cfg, env.info())
    agent = Agent(cfg, env, policy, filter_op)

    # ---- start training ---- #
    if policy_state is not None:
        agent.policy().reset(policy_state)
    if filter_state is not None:
        agent.filter().reset(filter_state)

    train_loop(cfg, agent, logger)

    print("Done")