# parameters n_epochs = 10000 # environment, agent env = CatchBall(n_rows, n_cols, n_playerlength) agent = DQNAgent(env.enable_actions, env.name, env.screen_n_rows, env.screen_n_cols, env.player_length) # variables win = 0 for e in range(n_epochs): # reset frame = 0 loss = 0.0 Q_max = 0.0 env.reset(keepPos=True) state_t_1, reward_t, terminal = env.observe() while not terminal: state_t = state_t_1 # execute action in environment action_t = agent.select_action(state_t, agent.exploration) env.execute_action(action_t) # observe environment state_t_1, reward_t, terminal = env.observe() # store experience agent.store_experience(state_t, action_t, reward_t, state_t_1, terminal)
# parameters n_epochs = 1000 # environment, agent env = CatchBall() agent = DQNAgent(env.enable_actions, env.name) # variables win = 0 for e in range(n_epochs): # reset frame = 0 loss = 0.0 Q_max = 0.0 env.reset() state_t_1, reward_t, terminal = env.observe() while not terminal: state_t = state_t_1 # execute action in environment action_t = agent.select_action(state_t, agent.exploration) env.execute_action(action_t) # observe environment state_t_1, reward_t, terminal = env.observe() # store experience agent.store_experience(state_t, action_t, reward_t, state_t_1, terminal)
# parameters n_epochs = 1000 # environment, agent env = CatchBall() agent = DQNAgent(env.enable_actions, env.name) # variables win = 0 for e in range(n_epochs): # reset frame = 0 loss = 0.0 Q_max = 0.0 env.reset() state_t_1, reward_t, terminal = env.observe() while not terminal: state_t = state_t_1 # execute action in environment action_t = agent.select_action(state_t, agent.exploration) env.execute_action(action_t) # observe environment state_t_1, reward_t, terminal = env.observe() # store experience agent.store_experience(state_t, action_t, reward_t, state_t_1, terminal)