state_size = env.observation_space.shape[0] #env.observation_space.shape action_size = env.action_space.shape[0] #print(state_size, action_size) agent = DQNAgent(state_size, action_size) # agent.load("../models/human-ddqn.h5f") done = False batch_size = 32 for e in range(args.EPISODES): state = env.reset() state = np.reshape(state, [1, state_size]) for time in range(500): env.render() action = agent.act(state) #print("action: ",type(action),action);print("ENV.STEP: ",env.step(action)) next_state, reward, done, _ = env.step(action) reward = reward if not done else -10 next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action, reward, next_state, done) state = next_state if done: agent.update_target_model() print("episode: {}/{}, score: {}, e: {:.2}".format( e, args.EPISODES, time, agent.epsilon)) break if len(agent.memory) > batch_size: agent.replay(batch_size) # if e % 10 == 0: # agent.save("../models/human-ddqn.h5f")
def run(display_option, speed, params): pygame.init() agent = DQNAgent(params) weights_filepath = params['weights_path'] if params['load_weights']: agent.model.load_weights(weights_filepath) print("weights loaded") counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < params['episodes']: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent, params['batch_size']) if display_option: display(player1, food1, game, record) while not game.crash: if not params['train']: agent.epsilon = 0 else: # agent.epsilon is set to give randomness to actions agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) # get old state state_old = agent.get_state(game, player1, food1) # perform random actions based on agent.epsilon, or choose the action if randint(0, 1) < agent.epsilon: final_move = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, 11))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) # perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) # set reward for the new state reward = agent.set_reward(player1, game.crash) if params['train']: # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if display_option: display(player1, food1, game, record) pygame.time.wait(speed) if params['train']: agent.replay_new(agent.memory, params['batch_size']) counter_games += 1 print(f'Game {counter_games} Score: {game.score}') score_plot.append(game.score) counter_plot.append(counter_games) if params['train']: agent.model.save_weights(params['weights_path']) plot_seaborn(counter_plot, score_plot)