def run_dqn_thread(baseDir, runNo, thread_id, args):
    init_nn_library(True, '0')
    runner, agent = run_dqn(**args)
    runner.run()
    with open(
            baseDir + '/' + str(args['learning_rate']) + '-' + str(runNo) +
            '.pkl', 'w') as f:
        pickle.dump(agent.stats, f)
Exemplo n.º 2
0
                    help='gym-env monitor directory')
parser.add_argument('--gpu',
                    type=str,
                    default=None,
                    help='Gpu to use (none for cpu, any for any gpu)')

args = parser.parse_args()

from algo.dqn import run_dqn, run_dqn_test

arguments = vars(args)

import numpy as np

if args.mode == "train":
    runner, _ = run_dqn(**arguments)
    if args.save_trajectory is not None:
        from utils.trajectory_utils import TrajectorySaver
        ts = TrajectorySaver(args.save_trajectory)
        runner.listen(ts, None)
    runner.run()
else:
    stats = run_dqn_test(**arguments)

#from PIL import Image
#
#from envs.gym_env import gym_env
#from envs.env_transform import *
#from utils.preprocess import *
#from utils.network_utils import NetworkSaver
#from runner.runner import Runner
Exemplo n.º 3
0
import tensorflow as tf 
from utils.network_utils import NetworkSaver
from runner.runner import TrajRunner
from utils.trajectory_utils import TrajectoryReplay
from nets.net import init_nn_library

arguments = vars(args)

env = get_env(args.game, args.atari, args.env_transforms)

if args.load_trajectory is None:
	dqn_args = arguments.copy()
	dqn_args['mode'] = 'test'
	dqn_args['replay_buffer_size'] = 0

	runner = run_dqn(**dqn_args)
	replay_buffer = ReplayBuffer(args.replay_buffer_size, 1, args.update_frequency, args.replay_start_size, args.batch_size)
else:
	init_nn_library(True, "1")
	runner = TrajRunner(args.max_step)
	replay_buffer = TrajectoryReplay(args.load_trajectory, args.batch_size)

envOps = EnvOps(env.observation_space.shape, env.action_space.n, args.learning_rate)
summary_writer = tf.summary.FileWriter(args.logdir, K.get_session().graph) if not args.logdir is None else None

#model = EnvModelCartpole(envOps)
model = globals()[args.env_model](envOps)
env = EnvLearner(replay_buffer, model, summary_writer, args.reward_scale)

runner.listen(env, None)
Exemplo n.º 4
0
def run_dqn_thread(baseDir, runNo, args, max_episode, test_only):
    init_nn_library(True, args['gpu'])
    if not test_only:
        runner, _ = run_dqn(**args)
        runner.run()
    run_test('dqn', args, baseDir, runNo, max_episode)