def test_tree(env_path, tree_pickle, itr, deterministic, len, episodes, render=False): env, _, _ = load_policy(env_path, itr, deterministic) tree_data = joblib.load(tree_pickle) tree_program = tree_data['tree'] label_data(env, lambda a: tree_program.predict([a]), len, episodes, render)
parser.add_argument('--deterministic', '-d', action='store_true') parser.add_argument('--seed', type=int, default=301) parser.add_argument('--save_file', type=str, default='') parser.add_argument('--idProcess', type=int, default='0') parser.add_argument('--num_env_interact', type=int, default=1000000) parser.add_argument('--steps_per_epoch', type=int, default=30000) parser.add_argument('--max_ep_len', type=int, default=1000) args = parser.parse_args() # Set the seed for reproducibility tf.set_random_seed(args.seed) np.random.seed(args.seed) # Get the policies env, get_action, sess = load_policy(args.fpath, args.itr if args.itr >= 0 else 'last', args.deterministic) # Set the seed of the environment print(args.seed, args.idProcess) env.seed(args.seed + args.idProcess) # Test the given policy run_policy(env, get_action, render=args.render, num_env_interact=args.num_env_interact, steps_per_epoch=args.steps_per_epoch, max_ep_len=args.max_ep_len, save_file=args.save_file, idProcess=args.idProcess)
def load(load_folder, env): load_folder = str(load_folder) gym_env, get_action, sess = load_policy(load_folder) net = MultilayerPerceptron(env, get_action) return net, gym_env