def dqn(): env = Tetris() episodes = 2000 max_steps = None epsilon_stop_episode = 1500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] current_state = env.reset() done = False steps = 0 # if render_every and episode % render_every == 0: # render = True # else: render = True actions = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 # if render_every and episode % render_every == 0: # render = True # else: render = False actions = [] # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] actions.append(best_action) steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0: agent.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score) print(agent.model.evaluate(current_state)) agent.model.save_weights("ia_tetris_weights.h5") while True: current_state = env.reset() done = False steps = 0 render = True # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] actions.append(best_action) steps += 1 scores.append(env.get_game_score())
def dqn(): env = Tetris() episodes = 2000 max_steps = 1000000000 epsilon_stop_episode = 1750 mem_size = 20000 discount = 0.95 batch_size = 1024 epochs = 1 render_every = 1 log_every = 1 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = 0.01 activations = ['relu', 'relu', 'linear'] m = 0 agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) log_dir = f'logs/tetris-eps={episodes}-e-stop={epsilon_stop_episode}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] steps_list = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if (render_every and episode % render_every == 0) or episode == (episodes - 1): render = True record = True else: render = False record = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], episode, render=render, render_delay=render_delay, record=record) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) steps_list.append(steps) # Train if episode % train_every == 0: agent.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: score = scores[-log_every] steps = steps_list[-log_every] log.log(episode, score = score, steps = steps)
def dqn(): env = Tetris() episodes = 2000 max_steps = None epsilon_stop_episode = 1500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [64, 32, 16] render_delay = None activations = ['relu', 'relu', 'relu', 'linear'] agent = DQNAgent( env.get_state_size(), epsilon=0, n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size, ) log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] scores_sum = 0 score_max = 0 for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() # print('\n\n', next_states) best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 score = env.get_game_score() scores.append(score) scores_sum += score if score > score_max: score_max = score if episode != 0 and episode % render_every == 0: # print('SCORES SUM:', scores_sum, 'AVG:', scores_sum / render_every, 'MAX:', score_max) scores_sum = 0 score_max = 0 # Train # if episode % train_every == 0: # agent.train(batch_size=batch_size, epochs=epochs) print('Done!') sleep(30) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score)
def dqn(): env = Tetris() episodes = 2000 max_steps = None epsilon_stop_episode = 500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] _max_height = True _min_height = True _current_piece = False _next_piece = False _max_bumpiness = False _lines = False _holes = True _total_bumpiness = True _sum_height = False for episode in tqdm(range(episodes)): current_state = env.reset(_max_height, _min_height, _current_piece, _next_piece, _max_bumpiness, _lines, _holes, _total_bumpiness, _sum_height) done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): # No params for default next_states = env.get_next_states(_max_height, _min_height, _current_piece, _next_piece, _max_bumpiness, _lines, _holes, _total_bumpiness, _sum_height) best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0: agent.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) cleared_lines = env.get_lines() log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score, cleared_lines=cleared_lines)
def dqn(conf: AgentConf): env = Tetris() agent = DQNAgent(env.get_state_size(), n_neurons=conf.n_neurons, activations=conf.activations, epsilon=conf.epsilon, epsilon_min=conf.epsilon_min, epsilon_stop_episode=conf.epsilon_stop_episode, mem_size=conf.mem_size, discount=conf.discount, replay_start_size=conf.replay_start_size) timestamp_str = datetime.now().strftime("%Y%m%d-%H%M%S") # conf.mem_size = mem_size # conf.epochs = epochs # conf.epsilon_stop_episode = epsilon_stop_episode # conf.discount = discount log_dir = f'logs/tetris-{timestamp_str}-ms{conf.mem_size}-e{conf.epochs}-ese{conf.epsilon_stop_episode}-d{conf.discount}' log = CustomTensorBoard(log_dir=log_dir) print(f"AGENT_CONF = {log_dir}") scores = [] episodes_wrapped: Iterable[int] = tqdm(range(conf.episodes)) for episode in episodes_wrapped: current_state = env.reset() done = False steps = 0 # update render flag render = True if conf.render_every and episode % conf.render_every == 0 else False # game while not done and (not conf.max_steps or steps < conf.max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) # find the action, that corresponds to the best state best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.hard_drop([best_action[0], 0], best_action[1], render=render) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 # just return score scores.append(env.get_game_score()) # train if episode % conf.train_every == 0: # n = len(agent.memory) # print(f" agent.memory.len: {n}") agent.train(batch_size=conf.batch_size, epochs=conf.epochs) # logs if conf.log_every and episode and episode % conf.log_every == 0: avg_score = mean(scores[-conf.log_every:]) min_score = min(scores[-conf.log_every:]) max_score = max(scores[-conf.log_every:]) log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score) # save_model save_model(agent.model, f'{log_dir}/model.hdf', overwrite=True, include_optimizer=True)
def dqn(): env = Tetris() episodes = 4000 max_steps = None batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 render_delay = None algo = DQNAlgorithm(env.get_state_size()) log_dir = f'logs/tetris-nn={str([32, 32])}-mem={20000}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] times = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = algo.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) algo.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) times.append(env.get_game_time()) # Train if episode % train_every == 0: algo.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: score = scores[-log_every:] time = times[-log_every:] avg_score = mean(score) min_score = min(score) max_score = max(score) avg_time = mean(time) min_time = min(time) max_time = max(time) log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score, avg_time=avg_time, min_time=min_time, max_time=max_time)