import numpy as np import DQN NUM_EPISODE = 10 NUM_STEP = 100 if __name__ == "__main__": mainQN = DQN.QNetwork(debug_log=True) memory = DQN.Memory(max_size=1000) #actor = DQN.Actor() for episode in range(NUM_EPISODE): print('episode {}'.format(episode)) state = np.random.rand(16 * 16 * 8).reshape(1, 16, 16, 8) #action1 = [7, 7] #action2 = [8, 8] for step in range(NUM_STEP): #action, _ = actor.get_action(state, step, mainQN, 'r', action1, action2, 1, True, False, False) action = np.array([0, 0]) if step == NUM_STEP - 1: next_state = np.zeros((1, 16, 16, 8)) reward = 1.0 else: next_state = np.random.rand(16 * 16 * 8).reshape(1, 16, 16, 8)
solved_reward = 230 # stop training if avg_reward > solved_reward log_interval = 20 # print avg reward in the interval max_episodes = 50000 # max training episodes max_timesteps = 3000 # max timesteps in one episode n_latent_var = 64 # number of variables in hidden layer update_timestep = 200 # update policy every n timesteps #Change these first lr = 0.002 betas = (0.9, 0.999) gamma = 0.99 # discount factor K_epochs = 4 # update policy for K epochs eps_clip = 0.2 # clip parameter for PPO ############################################# # print(dir(DQN)) memory = DQN.Memory() model = DQN.DQN(state_dim, action_dim, n_latent_var, lr, betas, gamma, K_epochs, eps_clip) # memory = PPO.Memory() # model = PPO.PPO(state_dim, action_dim, n_latent_var, lr, betas, gamma, K_epochs, eps_clip) print("About to load model...") if loadingBool: try: print(file) ## DQN model.policy_net.load_state_dict(torch.load(file)) model.target_net.load_state_dict(torch.load(file)) ## PPO # model.policy.load_state_dict(torch.load(file))