def test_smoke(): # just runs the code - no assetions numberOfCells = 10 # in each axis startingPosition = (4, 5) # head foodPosition = (3, 6) env = Environment(numberOfCells) agent = DQNAgent(state_size=env.state_size, action_size=Actions.action_size, deterministic=True, batch_size=24, memory_limit=2000) state = env.reset(startingPosition, foodPosition) agent.reset_convolutional_layers() full_state = agent.get_convolutional_layers(state) maxsteps = 2 for step in range(maxsteps): action = agent.get_exploration_action() next_state, reward, done = env.step(action, food_position=(1, 1)) full_next_state = agent.get_convolutional_layers(next_state) assert(full_next_state.shape == (1, numberOfCells, numberOfCells, agent.numberOfLayers)) agent.save_transition(full_state, action, reward, full_next_state, done) current_loss = agent.train() if (step == 0): action1 = action loss1 = current_loss full_state = full_next_state loss2 = current_loss action2 = action
def test_single_training(): numberOfCells = 10 # in each axis startingPosition = (4, 5) # head foodPosition = (3, 6) env = Environment(numberOfCells, deterministic=True) agent = DQNAgent(state_size=env.state_size, action_size=Actions.action_size, deterministic=True, batch_size=24, memory_limit=2000) state = env.reset(startingPosition, foodPosition) agent.reset_convolutional_layers() full_state = agent.get_convolutional_layers(state) loss10 = -1 action10 = -1 maxsteps = 10 for step in range(maxsteps): action = agent.get_exploration_action() next_state, reward, done = env.step(action, food_position=(1, 1)) assert(not done) full_next_state = agent.get_convolutional_layers(next_state) assert(full_next_state.shape == (1, numberOfCells, numberOfCells, agent.numberOfLayers)) agent.save_transition(full_state, action, reward, full_next_state, done) current_loss = agent.train() full_state = full_next_state loss10 = current_loss action10 = action assert(loss10 == 0.006804642267525196) assert(action10 == 0)
def test_multiepisode_training(): numberOfCells = 10 # in each axis startingPosition = (4, 5) # head foodPosition = (3, 6) env = Environment(numberOfCells, deterministic=True) state_size = env.state_size action_size = Actions.action_size # 3 agent = DQNAgent(state_size=state_size, action_size=action_size, deterministic=True, batch_size=24, memory_limit=2000) losses = [-1, -1, -1, -1] done = False episodes = 4 maxsteps = 9 for e in range(episodes): state = env.reset(startingPosition, foodPosition) agent.reset_convolutional_layers() full_state = agent.get_convolutional_layers(state) loss = 0 for step in range(maxsteps): action = agent.get_exploration_action() next_state, reward, done = env.step(action, food_position=(1, 1)) # generation on (1, 1) happens once over the test full_next_state = agent.get_convolutional_layers(next_state) assert(full_next_state.shape == (1, numberOfCells, numberOfCells, agent.numberOfLayers)) agent.save_transition(full_state, action, reward, full_next_state, done) current_loss = agent.train() loss += current_loss full_state = full_next_state losses[e] = loss assert(losses[0] == 3.9618697417899966) assert(losses[1] == 0.044194952584803104) assert(losses[2] == 0.1333141174982302) assert(losses[3] == 2.834151452407241)
def train_snake(): # todo: put all these parameters using a configuration file numberOfCells = 10 # in each axis startingPosition = (4, 5) # head foodPosition = (3, 6) max_steps_allowed = 1000 env = Environment(numberOfCells) state_size = env.state_size #(numberOfCells x numberOfCells) action_size = Actions.action_size # 3 agent = DQNAgent(state_size=state_size, action_size=action_size, batch_size=32, memory_limit=6000, number_of_channels=5) episodes = 30000 decay = 0.9 / episodes * 2 # changes epsilon : explore vs exploit epochs = [] losses = [] steps_list = [] with open('training_data', 'w') as f: for e in range(episodes): state = env.reset(startingPosition) #print('state array reset: \n', state) agent.reset_convolutional_layers() full_state = agent.get_convolutional_layers(state) loss = 0.0 steps = 0 done = False episode_reward = 0 while not done: # state at this point is just a 2D array action = agent.get_action(full_state) #action = agent.get_raction() #print('action chosen: ', action) # step onto the next state next_state, reward, done = env.step(action) #print('state array after step ', steps, ' : \n', next_state) #print('reward returned: ', reward) #print('next state: ', next_state) # we store the next_state in (1,H,W,C) full_next_state = agent.get_convolutional_layers(next_state) #print('full next state: \n:', full_next_state) #assert(full_next_state.shape == (1, numberOfCells, numberOfCells, agent.numberOfLayers)) # save S,A,R,S' to experience # full states are a snapshot - copies of the state agent.save_transition(full_state, action, reward, full_next_state, done) episode_reward += reward # use alternative policy to train model - rely on experience only current_loss = agent.train() #print('current_loss: ', current_loss) loss += current_loss full_state = full_next_state # limit max steps - avoid something bad steps += 1 if steps >= max_steps_allowed: done = True # next episode if agent.epsilon > 0.1: agent.epsilon -= decay # agent slowly reduces exploring print( 'episode: {:5d} steps: {:3d} epsilon: {:.3f} loss: {:8.4f} reward: {:3d} fruits: {:2d}' .format(e, steps, agent.epsilon, loss, episode_reward, env.fruits_eaten)) f.write('{:5d} {:3d} {:8.4f} {:4d} {:2d}\n'.format( e, steps, loss, episode_reward, env.fruits_eaten)) agent.model.save('trained_snake.model')