def main(cfg): env_name, use_zf, gamma, tau, policy_state, filter_state =\ cfg.require("env name", "use zfilter", "advantage gamma", "advantage tau", "policy state dict", "filter state dict") logger = Logger() logger.init(cfg) filter_op = ZFilter(gamma, tau, enable=use_zf) env = FakeGym(env_name) policy = Policy(cfg, env.info()) agent = Agent(cfg, env, policy, filter_op) # ---- start training ---- # if policy_state is not None: agent.policy().reset(policy_state) if filter_state is not None: agent.filter().reset(filter_state) train_loop(cfg, agent, logger) print("Done")
def main(cfg): env_name, action_mode, policy_state, filter_state =\ cfg.require("env name", "action mode", "policy state dict", "filter state dict") logger = Logger() logger.init(cfg) filter_op = Filter() # env = FakeGym(env_name) env = FakeRLBench(env_name, action_mode=action_mode) policy = Policy(cfg, env.info()) agent = Agent(cfg, env, policy, filter_op) # ---- start training ---- # if policy_state is not None: agent.policy().reset(policy_state) if filter_state is not None: agent.filter().reset(filter_state) train_loop(cfg, agent, logger) print("Done")