def create_app(): _app = Agent(pull_interval=5, debug=True) return _app
import tensorflow as tf from env.main import Account from agent.main import Agent, Access, Framework env = Account() init = env.reset() print(init.shape) name = 'W0' input_shape = [381, 240, 58] action_size = 3 A = Access(input_shape, action_size) W0 = Agent(name, A, input_shape, action_size) print(W0.a_interface) # with tf.Session() as sess: # sess.run(tf.global_variables_initializer()) # W0.init_or_update_local(sess) # da = W0.get_deterministic_policy_action(sess, init) # sa = W0.get_stochastic_action(sess, init) # # next_state, reward, done = env.step(sa) # print(next_state.shape) # print(reward) # print(done)
# while True: # action = agent.get_stochastic_policy(sess, state) # next_state, reward, done = env.step(action) # agent.update_cache(state, action, reward, next_state, done) # state = next_state # if done: # break # # agent.update_value_net(sess) # agent.update_target_net(sess) env = Account() state = env.reset() agent = Agent() while True: action = agent.get_stochastic_policy(state) next_state, reward, done = env.step(action) agent.update_cache(state, action, reward, next_state, done) state = next_state if done: break agent.update_target() agent.update_eval() agent.save_model() agent.restore_model()
import gc import time import numpy as np from visdom import Visdom from agent.main import Agent from emulator.main import Account # viz = Visdom() # assert viz.check_connection() env = Account() state = env.reset() image_shape = state.shape print(image_shape) agent = Agent(image_shape, 3) max_episodes = 1000 global_step = 0 for episode in range(max_episodes): state = env.reset() cache_reward = [] cache_valaue = [] while True: global_step += 1 action = agent.get_epsilon_policy(np.expand_dims(state, 0)) next_state, reward, done = env.step(action) agent.append(state, action, reward, done, next_state) cache_reward.append(reward) cache_valaue.append(env.total_value)
def create_app(): _app = Agent(pull_interval=5) return _app
import numpy as np import pandas as pd import seaborn as sns import tensorflow as tf from agent.main import Access, Agent from env.main import Account tf.reset_default_graph() sns.set_style('whitegrid') %matplotlib inputs_shape = [381, 240, 58] action_size = 3 config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.device("/cpu:0"): A = Access(inputs_shape, action_size) W = Agent('W0', A, inputs_shape, action_size) A.restore(sess,'model/saver_1.ckpt') W.init_or_update_local(sess) env = Account() state = env.reset() for _ in range(466): action = W.get_deterministic_policy_action(sess, state) state, reward, done = env.step(action)