def train(env, config): """ """ t0 = time.time() save_models_path = str(config["locexp"]) memory = ReplayBuffer((8, ), (1, ), config["buffer_size"], config["seed"], config["device"]) memory.load_memory(config["buffer_path"]) agent = Agent(state_size=8, action_size=4, config=config) if config["idx"] < memory.idx: memory.idx = config["idx"] print("memory idx ", memory.idx) for t in range(config["predicter_time_steps"]): text = "Train Predicter {} \ {} time {} \r".format( t, config["predicter_time_steps"], time_format(time.time() - t0)) print(text, end='') agent.learn(memory) if t % int(config["eval"]) == 0: print(text) agent.save(save_models_path + "/models/{}-".format(t)) #agent.test_predicter(memory) agent.test_q_value(memory) agent.eval_policy() agent.eval_policy(True, 1)
def train(env, config): """ """ t0 = time.time() memory = ReplayBuffer((8,), (1,), config["expert_buffer_size"], config["device"]) memory.load_memory(config["buffer_path"]) agent = Agent(state_size=8, action_size=4, config=config) memory.idx = config["idx"] #for i in range(10): # print("state", memory.obses[i]) # sys.exit() print("memroy idx ",memory.idx) if config["mode"] == "predict": for t in range(config["predicter_time_steps"]): text = "Train Predicter {} \ {} time {} \r".format(t, config["predicter_time_steps"], time_format(time.time() - t0)) print(text, end = '') agent.learn_predicter(memory) if t % 2000 == 0: # agent.test_predicter(memory) agent.save("pytorch_models-{trained_predicter}/") return if config["mode"] == "iql": agent.test_predicter(memory) for t in range(config["predicter_time_steps"]): text = "Train Predicter {} \ {} time {} \r".format(t, config["predicter_time_steps"], time_format(time.time() - t0)) print(text, end = '') agent.learn(memory) if t % 100 == 0: print(text) agent.test_predicter(memory) agent.test_q_value(memory) # agent.test_policy() if config["mode"] == "dqn": print("mode dqn") agent.dqn_train() return