def run(): args = parser.parse_args() args.task = 0 args.f_num = util.parse_to_num(args.f_num) args.f_stride = util.parse_to_num(args.f_stride) args.f_size = util.parse_to_num(args.f_size) args.branch = util.parse_to_num(args.branch) env = new_env(args) args.meta_dim = 0 if not hasattr(env, 'meta') else len(env.meta()) device = '/gpu:0' if args.gpu > 0 else '/cpu:0' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) config = tf.ConfigProto(device_filters=device, gpu_options=gpu_options, allow_soft_placement=True) with tf.Session(config=config) as sess: if args.alg == 'A3C': model_type = 'policy' elif args.alg == 'Q': model_type = 'q' elif args.alg == 'VPN': model_type = 'vpn' else: raise ValueError('Invalid algorithm: ' + args.alg) with tf.device(device): with tf.variable_scope("local/learner"): agent = eval("model." + args.model)( env.observation_space.shape, env.action_space.n, type=model_type, gamma=args.gamma, dim=args.dim, f_num=args.f_num, f_stride=args.f_stride, f_size=args.f_size, f_pad=args.f_pad, branch=args.branch, meta_dim=args.meta_dim) print("Num parameters: %d" % agent.num_param) saver = tf.train.Saver() saver.restore(sess, args.checkpoint) np.random.seed(args.seed) reward = evaluate(env, agent, args.n_play, eps=args.eps) print("Reward: %.2f" % (reward))
def run(): args = parser.parse_args() args.task = 0 args.f_num = util.parse_to_num(args.f_num) args.f_stride = util.parse_to_num(args.f_stride) args.f_size = util.parse_to_num(args.f_size) args.branch = util.parse_to_num(args.branch) env = new_env(args) args.meta_dim = 0 device = '/gpu:0' if args.gpu > 0 else '/cpu:0' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) config = tf.ConfigProto(device_filters=device, gpu_options=gpu_options, allow_soft_placement=True) with tf.Session(config=config) as sess: model_type = 'vpn' with tf.device(device): with tf.variable_scope("local/learner"): agent = eval("model." + args.model)( env.observation_space.shape, env.action_space.n, type=model_type, gamma=args.gamma, dim=args.dim, f_num=args.f_num, f_stride=args.f_stride, f_size=args.f_size, f_pad=args.f_pad, branch=args.branch, meta_dim=args.meta_dim) print("Num parameters: %d" % agent.num_param) saver = tf.train.Saver() saver.restore(sess, args.checkpoint) np.random.seed(args.seed) evaluate(env, agent, args.n_play, eps=args.eps)
def run(envs=None): stats = [] args = parser.parse_args() args.task = 0 args.f_num = util.parse_to_num(args.f_num) args.f_stride = util.parse_to_num(args.f_stride) args.f_size = util.parse_to_num(args.f_size) args.branch = util.parse_to_num(args.branch) env = new_env(args) if envs is None or not envs: envs = [env] args.meta_dim = 0 if env.meta() is None else len(env.meta()) device = '/gpu:0' if args.gpu > 0 else '/cpu:0' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) branches = [ [4, 4, 4], [4, 4, 4, 4], [4, 4, 4, 4, 4], [4, 1, 4, 1, 4], [1], [1, 1, 1], [1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1], [4, 4, 4, 1], [4, 4, 4, 1, 1], [4, 4, 4, 1, 1, 1], [1, 4, 4, 4], [1, 1, 1, 4, 4, 4], [1, 1, 1, 4, 4, 4], ] paths = [ f'/home/ikaynov/Repositories/value-prediction-network/Experiments/{x}/best' for x in [ 's10_t20_g8_444', 's10_t20_g8_4444', 's10_t20_g8_44444', ] ] count = 0 count_max = len(paths) * len(branches) for ck in paths: for branch_type in branches: print(f'Executing {count}/{count_max}.') config = tf.ConfigProto(device_filters=device, gpu_options=gpu_options, allow_soft_placement=True) tf.reset_default_graph() with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) if args.alg == 'A3C': model_type = 'policy' elif args.alg == 'Q': model_type = 'q' elif args.alg == 'VPN': model_type = 'vpn' else: raise ValueError('Invalid algorithm: ' + args.alg) with tf.device(device): # np.random.seed(args.seed) with tf.variable_scope("local/learner"): agent = eval("model." + args.model)( env.observation_space.shape, env.action_space.n, type=model_type, gamma=args.gamma, dim=args.dim, f_num=args.f_num, f_stride=args.f_stride, f_size=args.f_size, f_pad=args.f_pad, branch=branch_type, meta_dim=args.meta_dim) agent.train_branch = str([ int(x) for x in list(ck.split('/')[-2].split('_')[-1]) ]) print("Num parameters: %d" % agent.num_param) saver = tf.train.Saver() saver.restore(sess, ck) for i, env in enumerate(envs): run_stats = evaluate(env, agent, args.n_play, eps=args.eps) stats += run_stats count += 1 return stats