def test(episodes=20, agent=None, load_path=None, ifrender=False, log=False): if log: logger.configure(dir="./log/", format_strs="stdout") if agent is None: agent = DQN(num_state=16, num_action=4) if load_path: agent.load(load_path) else: agent.load() env = Game2048Env() score_list = [] highest_list = [] for i in range(episodes): state, _, done, info = env.reset() state = log2_shaping(state) start = time.time() while True: action = agent.select_action(state, deterministic=True) next_state, _, done, info = env.step(action) next_state = log2_shaping(next_state) state = next_state if ifrender: env.render() if done: print(env.Matrix) if log: logger.logkv('episode number', i + 1) logger.logkv('episode reward', info['score']) logger.logkv('episode steps', info['steps']) logger.logkv('highest', info['highest']) logger.dumpkvs() break end = time.time() if log: print('episode time:{} s\n'.format(end - start)) score_list.append(info['score']) highest_list.append(info['highest']) print('mean score:{}, mean highest:{}'.format(np.mean(score_list), np.mean(highest_list))) print('max score:{}, max hightest:{}'.format(np.max(score_list), np.max(highest_list))) result_info = { 'mean': np.mean(score_list), 'max': np.max(score_list), 'list': score_list } print(highest_list) return result_info
from flask import Flask, request, render_template, redirect, url_for, abort, jsonify, make_response import os import numpy as np from gym_2048 import Game2048Env from dqn_agent import DQN from utils import log2_shaping env = Game2048Env() app = Flask(__name__, static_folder="static", static_url_path="/static") agent = DQN(num_state=16, num_action=4) agent.load(path='./save/', name='dqn_9017.pkl') # @app.route('/1') # def hello_world(): # with open('./templates/index.html', encoding='utf-8')as f: # text = f.read() # return text @app.route('/') def hello_world2(): with open('./templates/py2048.html', encoding='utf-8') as f: text = f.read() return text @app.route('/move', methods=['POST']) def tile_move(): json = request.get_json()
def train(): episodes = train_episodes logger.configure(dir="./log/", format_strs="stdout,tensorboard,log") agent = DQN(num_state=16, num_action=4) agent.load(path="../save/", name="dqn_7710.pkl") env = Game2048Env() pf_saver = Perfomance_Saver() model_saver = Model_Saver(num=10) eval_max_score = 0 for i in range(episodes): state, reward, done, info = env.reset() state = log2_shaping(state) start = time.time() loss = None while True: if agent.buffer.memory_counter <= agent.memory_capacity: action = agent.select_action(state, random=True) else: action = agent.select_action(state) next_state, reward, done, info = env.step(action) next_state = log2_shaping(next_state) reward = log2_shaping(reward, divide=1) agent.store_transition(state, action, reward, next_state) state = next_state if ifrender: env.render() if agent.buffer.memory_counter % agent.train_interval == 0 and agent.buffer.memory_counter > agent.memory_capacity: # 相当于填满后才update loss = agent.update() if done: if i % log_interval == 0: if loss: logger.logkv('loss', loss) logger.logkv('training progress', (i + 1) / episodes) logger.logkv('episode reward', info['score']) logger.logkv('episode steps', info['steps']) logger.logkv('highest', info['highest']) logger.logkv('epsilon', agent.epsilon) logger.dumpkvs() loss = None if i % epsilon_decay_interval == 0: # episilon decay agent.epsilon_decay(i, episodes) break end = time.time() print('episode time:{} s\n'.format(end - start)) # eval if i % eval_interval == 0 and i: eval_info = test(episodes=test_episodes, agent=agent) average_score, max_score, score_lis = eval_info['mean'], eval_info['max'], eval_info['list'] pf_saver.save(score_lis, info=f'episode:{i}') if int(average_score) > eval_max_score: eval_max_score = int(average_score) name = 'dqn_{}.pkl'.format(int(eval_max_score)) agent.save(name=name) model_saver.save("./save/" + name) logger.logkv('eval average score', average_score) logger.logkv('eval max socre', max_score) logger.dumpkvs()