def train(game): agent = DQN(game) for i in tqdm(range(TRAIN_GAMES)): game.new_episode() previous_variables = None previous_img = None done = False local_history = [] total_reward = 0 while not done: state = game.get_state() img = state.screen_buffer variables = state.game_variables if previous_variables is None: previous_variables = variables if previous_img is None: previous_img = img action = agent.act(img) reward = game.make_action(action) done = game.is_episode_finished() reward = (reward + calculate_additional_reward(previous_variables, variables)) / 100 total_reward += reward local_history.append([previous_img, img, reward, action, done]) previous_variables = variables previous_img = img if total_reward >= 0: for previous_state, state, reward, action, done in local_history: agent.remember(previous_state, state, reward, action, done) agent.train()
def main(args): env = gym.make("CartPole-v0") if args.seed >= 0: random_seed(args.seed) env.seed(args.seed) agent = DQN(env, args) model = get_model(out_dim=env.action_space.n, lr=args.lr) agent.set_model(model) rewards_history, steps_history = [], [] train_steps = 0 # Training for ep in range(args.max_ep): state = env.reset() ep_rewards = 0 for step in range(env.spec.timestep_limit): # sample action action = agent.sample_action(state, policy="egreedy") # apply action next_state, reward, done, debug = env.step(action) train_steps += 1 ep_rewards += reward # modified reward to speed up learning reward = 0.1 if not done else -1 # train agent.train(state, action, reward, next_state, done) state = next_state if done: break steps_history.append(train_steps) if not rewards_history: rewards_history.append(ep_rewards) else: rewards_history.append(rewards_history[-1] * 0.9 + ep_rewards * 0.1) # Decay epsilon if agent.epsilon > args.final_epsilon: decay = (args.init_epsilon - args.final_epsilon) / args.max_ep agent.epsilon -= decay # Evaluate during training if ep % args.log_every == args.log_every - 1: total_reward = 0 for i in range(args.test_ep): state = env.reset() for j in range(env.spec.timestep_limit): if args.render: env.render() action = agent.sample_action(state, policy="greedy") state, reward, done, _ = env.step(action) total_reward += reward if done: break current_mean_rewards = total_reward / args.test_ep print("Episode: %d Average Reward: %.2f" % (ep + 1, current_mean_rewards)) # plot training rewards plt.plot(steps_history, rewards_history) plt.xlabel("steps") plt.ylabel("running avg rewards") plt.show()
x = input('''To train model: train, To test a trained model: test, To train on different dataset: d: ''') if x == 'd': dataset = input('Enter name of dataset as "example_dataset.csv": ') try: raw = preprocess(dataset) except: print('Invalid dataset') raw = preprocess(dataset) actions = 2 states = 7 env = MyStocksEnv(raw, window_size=1, frame_bound=(1, 300)) agent = DQN(actions, states, 100) all_rewards = agent.train(env, 1000) elif x == 'test': raw = preprocess() env = MyStocksEnv(raw, window_size=1, frame_bound=(1, 300)) all_rewards = trained_test('dqn_model.h5', env) else: raw = preprocess() actions = 2 states = 7 env = MyStocksEnv(raw, window_size=1, frame_bound=(1, 300)) agent = DQN(actions, states, 100) all_rewards = agent.train(env, 1000) if all_rewards != 0: print(all_rewards) plot(all_rewards)
# Check whether cuda is available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Initialise the game env = gym.make('ChromeDino-v0') # env = gym.make('ChromeDinoNoBrowser-v0') env = make_dino(env, timer=True, frame_stack=True) # Get the number of actions and the dimension of input n_actions = env.action_space.n # ----------- Nature DQN --------------- dqn = DQN(n_actions, device) dqn.train(env, logger) # dqn.load("./trained/dqn.pkl") # dqn.test(env) # ----------- Prioritized DQN --------------- # dqn_p = DQNPrioritized(n_actions, device) # dqn_p.train(env, logger) # dqn_p.load("./trained/dqn_p.pkl") # dqn_p.test(env) # ----------- Double DQN ---------------- # double_dqn = DoubleDQN(n_actions, device) # double_dqn.train(env, logger) # double_dqn.load("./trained/double-dqn.pkl") # double_dqn.test(env)