def dqn(): env = Tetris() episodes = 2000 max_steps = None epsilon_stop_episode = 1500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] current_state = env.reset() done = False steps = 0 # if render_every and episode % render_every == 0: # render = True # else: render = True actions = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 # if render_every and episode % render_every == 0: # render = True # else: render = False actions = [] # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] actions.append(best_action) steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0: agent.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score) print(agent.model.evaluate(current_state)) agent.model.save_weights("ia_tetris_weights.h5") while True: current_state = env.reset() done = False steps = 0 render = True # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] actions.append(best_action) steps += 1 scores.append(env.get_game_score())
def dqn(): env = Tetris() episodes = 2000 max_steps = None epsilon_stop_episode = 1500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [64, 32, 16] render_delay = None activations = ['relu', 'relu', 'relu', 'linear'] agent = DQNAgent( env.get_state_size(), epsilon=0, n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size, ) log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] scores_sum = 0 score_max = 0 for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() # print('\n\n', next_states) best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 score = env.get_game_score() scores.append(score) scores_sum += score if score > score_max: score_max = score if episode != 0 and episode % render_every == 0: # print('SCORES SUM:', scores_sum, 'AVG:', scores_sum / render_every, 'MAX:', score_max) scores_sum = 0 score_max = 0 # Train # if episode % train_every == 0: # agent.train(batch_size=batch_size, epochs=epochs) print('Done!') sleep(30) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score)
def dqn(): env = Tetris() episodes = 2000 max_steps = 1000000000 epsilon_stop_episode = 1750 mem_size = 20000 discount = 0.95 batch_size = 1024 epochs = 1 render_every = 1 log_every = 1 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = 0.01 activations = ['relu', 'relu', 'linear'] m = 0 agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) log_dir = f'logs/tetris-eps={episodes}-e-stop={epsilon_stop_episode}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] steps_list = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if (render_every and episode % render_every == 0) or episode == (episodes - 1): render = True record = True else: render = False record = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], episode, render=render, render_delay=render_delay, record=record) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) steps_list.append(steps) # Train if episode % train_every == 0: agent.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: score = scores[-log_every] steps = steps_list[-log_every] log.log(episode, score = score, steps = steps)
def dqn(conf: AgentConf): env = Tetris() agent = DQNAgent(env.get_state_size(), n_neurons=conf.n_neurons, activations=conf.activations, epsilon=conf.epsilon, epsilon_min=conf.epsilon_min, epsilon_stop_episode=conf.epsilon_stop_episode, mem_size=conf.mem_size, discount=conf.discount, replay_start_size=conf.replay_start_size) timestamp_str = datetime.now().strftime("%Y%m%d-%H%M%S") # conf.mem_size = mem_size # conf.epochs = epochs # conf.epsilon_stop_episode = epsilon_stop_episode # conf.discount = discount log_dir = f'logs/tetris-{timestamp_str}-ms{conf.mem_size}-e{conf.epochs}-ese{conf.epsilon_stop_episode}-d{conf.discount}' log = CustomTensorBoard(log_dir=log_dir) print(f"AGENT_CONF = {log_dir}") scores = [] episodes_wrapped: Iterable[int] = tqdm(range(conf.episodes)) for episode in episodes_wrapped: current_state = env.reset() done = False steps = 0 # update render flag render = True if conf.render_every and episode % conf.render_every == 0 else False # game while not done and (not conf.max_steps or steps < conf.max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) # find the action, that corresponds to the best state best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.hard_drop([best_action[0], 0], best_action[1], render=render) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 # just return score scores.append(env.get_game_score()) # train if episode % conf.train_every == 0: # n = len(agent.memory) # print(f" agent.memory.len: {n}") agent.train(batch_size=conf.batch_size, epochs=conf.epochs) # logs if conf.log_every and episode and episode % conf.log_every == 0: avg_score = mean(scores[-conf.log_every:]) min_score = min(scores[-conf.log_every:]) max_score = max(scores[-conf.log_every:]) log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score) # save_model save_model(agent.model, f'{log_dir}/model.hdf', overwrite=True, include_optimizer=True)
def dqn(): env = Tetris() episodes = 2000 max_steps = None epsilon_stop_episode = 500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] _max_height = True _min_height = True _current_piece = False _next_piece = False _max_bumpiness = False _lines = False _holes = True _total_bumpiness = True _sum_height = False for episode in tqdm(range(episodes)): current_state = env.reset(_max_height, _min_height, _current_piece, _next_piece, _max_bumpiness, _lines, _holes, _total_bumpiness, _sum_height) done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): # No params for default next_states = env.get_next_states(_max_height, _min_height, _current_piece, _next_piece, _max_bumpiness, _lines, _holes, _total_bumpiness, _sum_height) best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0: agent.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) cleared_lines = env.get_lines() log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score, cleared_lines=cleared_lines)
def dqn(): env = Tetris() episodes = 500 max_steps = None epsilon_stop_episode = int(episodes * 0.75) mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 50 log_every = 25 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] dqn_num = 3 filepaths = "tetris-nn_" + str(dqn_num) + "-.h5" # filepaths = ["tetris-nn_"+str(dqn_num)+"-"+str(i)+".h5" for i in range(0,10)] save = len(filepaths) save_every = episodes / save log_fp = "log.txt" csv_fp = "dqn_" + str(dqn_num) + "_training.csv" log = open(log_fp, "a") log.write("\ntetris-nn=" + str(n_neurons) + "-mem=" + str(mem_size) + "-bs=" + str(batch_size) + "-e=" + str(epochs) + "-" + str(datetime.now().strftime("%Y%m%d-%H%M%S")) + "\n\n") log.close() agent = DQNAgent(env.get_action_space(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) scores = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.step(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0: agent.train(batch_size=batch_size, epochs=epochs) # Save if (episode + 1) % save_every == 0: agent.save(filepaths) # agent.save(filepaths[save-10]) save += 1 # Logs if log_every and episode and (episode + 1) % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) log = open(log_fp, "a") logging = "episode: "+str(episode+1)+", avg_score: "+str(avg_score)+", min_score: "+\ str(min_score)+", max_score: "+str(max_score)+"\n" log.write(logging) log.close() log = open(log_fp, "a") log.write( "\n------------------------------------------------------------------------------------------------" + "\n") log.close() df = pd.DataFrame(scores) df.to_csv(csv_fp)
def dqn(): env = TetrisApp(8, 16, 750, False, 40, 30 * 100) episodes = 5000 max_steps = None epsilon_stop_episode = 1500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) # log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' # log = CustomTensorBoard(log_dir=log_dir) scores = [] env.pcrun() for episode in tqdm(range(episodes)): env.reset() current_state = env._get_board_props(env.board) done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.pcplace(best_action[0], best_action[1]) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0: agent.train(batch_size=batch_size, epochs=epochs) # Logs # if log_every and episode and episode % log_every == 0: # avg_score = mean(scores[-log_every:]) # min_score = min(scores[-log_every:]) # max_score = max(scores[-log_every:]) # log.log(episode, avg_score=avg_score, min_score=min_score, # max_score=max_score) plt.xlabel("Episodes") plt.ylabel('Average score over 30 episodes') plt.grid() plt.plot(np.linspace(30, episodes, episodes - 29), moving_average(scores, 30)) plt.savefig("nlinker.png")
def dqn(): trainingAgent = False trainingHater = False env = Tetris(trainingAgent or trainingHater) episodes = 2000 max_steps = None epsilon_stop_episode = 1500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 200 if (trainingAgent or trainingHater) else 10 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] agent_save_filepath = "keras_saved_maxbump.h5" # hater_save_filepath = "hater_changed_reward.h5" hater_save_filepath = "hater_best.h5" # Avg 135 || reward function = 1 + (lines_cleared ** 2)*self.BOARD_WIDTH - (.1)*self._bumpiness(self.board)[0]/self.BOARD_WIDTH # 200 death penalty # agent_save_filepath = "keras_saved_maxbump.h5" # Avg 25 || reward function = 1 + (lines_cleared ** 2)*self.BOARD_WIDTH # 2 death penalty # agent_save_filepath = "keras_saved.h5" agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size, training=trainingAgent, agent_save_filepath=agent_save_filepath) hateris = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size, training=trainingHater, agent_save_filepath=hater_save_filepath) env.hater = hateris log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) if len(current_state ) == env.get_state_size() - 1 and trainingAgent: toBeAdded = current_state + [env.next_piece] elif len(current_state ) == env.get_state_size() - 1 and trainingHater: toBeAdded = current_state + [env.current_piece] else: toBeAdded = current_state if trainingAgent: agent.add_to_memory(toBeAdded, next_states[best_action], reward, done) if trainingHater: hateris.add_to_memory(toBeAdded, next_states[best_action], -reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0 and trainingAgent: agent.train(batch_size=batch_size, epochs=epochs) if episode % train_every == 0 and trainingHater: hateris.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) std_score = stdev(scores[-log_every:]) print( str(episode) + " Avg: " + str(avg_score) + " Min: " + str(min_score) + " Max: " + str(max_score) + " Std: " + str(round(std_score, 2))) if episode == epsilon_stop_episode and trainingAgent: agent.save_agent("agent_stopEps.h5") if episode == epsilon_stop_episode and trainingHater: hateris.save_agent("hater_stopEps.h5") if trainingAgent: agent.save_agent("real_agent.h5") if trainingHater: hateris.save_agent("real_hater.h5") plt.plot(scores) plt.show()
def dqn(): training = False env = Tetris(training) episodes = 2000 max_steps = None epsilon_stop_episode = 1500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 200 if training else 10 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] #agent_save_filepath = "keras_saved_maxbump.h5" # with open("saved_agent", "rb") as input_file: # agent = pickle.load(input_file) # agent.epsilon = 0 agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size, training=training, agent_save_filepath=agent_save_filepath) log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) if len(current_state) == env.get_state_size() - 1: toBeAdded = current_state + [env.next_piece] else: toBeAdded = current_state if training: agent.add_to_memory(toBeAdded, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0 and training: agent.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) std_score = stdev(scores[-log_every:]) print( str(episode) + " Avg: " + str(avg_score) + " Min: " + str(min_score) + " Max: " + str(max_score) + " Std: " + str(round(std_score, 2))) if episode == epsilon_stop_episode: agent.save_agent("keras_saved_stopEps.h5") if training: agent.save_agent("keras_saved.h5") plt.plot(scores) plt.show()
replay_start_size=replay_start_size) while True: current_state = env.reset() done = False steps = 0 render = True # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] actions.append(best_action) steps += 1 scores.append(env.get_game_score())