Пример #1
0
def create_app():
    _app = Agent(pull_interval=5, debug=True)

    return _app
Пример #2
0
import tensorflow as tf
from env.main import Account
from agent.main import Agent, Access, Framework

env = Account()
init = env.reset()
print(init.shape)

name = 'W0'
input_shape = [381, 240, 58]
action_size = 3
A = Access(input_shape, action_size)
W0 = Agent(name, A, input_shape, action_size)

print(W0.a_interface)

# with tf.Session() as sess:
#     sess.run(tf.global_variables_initializer())
#     W0.init_or_update_local(sess)
#     da = W0.get_deterministic_policy_action(sess, init)
#     sa = W0.get_stochastic_action(sess, init)
#
#     next_state, reward, done = env.step(sa)
#     print(next_state.shape)
#     print(reward)
#     print(done)
Пример #3
0
# while True:
#     action = agent.get_stochastic_policy(sess, state)
#     next_state, reward, done = env.step(action)
#     agent.update_cache(state, action, reward, next_state, done)
#     state = next_state
#     if done:
#         break
#
# agent.update_value_net(sess)
# agent.update_target_net(sess)


env = Account()
state = env.reset()

agent = Agent()
while True:
    action = agent.get_stochastic_policy(state)
    next_state, reward, done = env.step(action)
    agent.update_cache(state, action, reward, next_state, done)
    state = next_state
    if done:
        break

agent.update_target()
agent.update_eval()
agent.save_model()
agent.restore_model()


Пример #4
0
import gc
import time
import numpy as np
from visdom import Visdom
from agent.main import Agent
from emulator.main import Account

# viz = Visdom()
# assert viz.check_connection()

env = Account()
state = env.reset()
image_shape = state.shape
print(image_shape)

agent = Agent(image_shape, 3)

max_episodes = 1000
global_step = 0
for episode in range(max_episodes):
    state = env.reset()
    cache_reward = []
    cache_valaue = []
    while True:
        global_step += 1
        action = agent.get_epsilon_policy(np.expand_dims(state, 0))
        next_state, reward, done = env.step(action)
        agent.append(state, action, reward, done, next_state)

        cache_reward.append(reward)
        cache_valaue.append(env.total_value)
Пример #5
0
def create_app():
    _app = Agent(pull_interval=5)

    return _app
Пример #6
0
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from agent.main import Access, Agent
from env.main import Account

tf.reset_default_graph()
sns.set_style('whitegrid')
%matplotlib


inputs_shape = [381, 240, 58]
action_size = 3


config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
    with tf.device("/cpu:0"):
        A = Access(inputs_shape, action_size)
        W = Agent('W0', A, inputs_shape, action_size)
        A.restore(sess,'model/saver_1.ckpt')
        W.init_or_update_local(sess)
        env = Account()
        state = env.reset()
        for _ in range(466):
            action = W.get_deterministic_policy_action(sess, state)
            state, reward, done = env.step(action)