示例#1
0
def test_tree(env_path,
              tree_pickle,
              itr,
              deterministic,
              len,
              episodes,
              render=False):
    env, _, _ = load_policy(env_path, itr, deterministic)
    tree_data = joblib.load(tree_pickle)
    tree_program = tree_data['tree']
    label_data(env, lambda a: tree_program.predict([a]), len, episodes, render)
示例#2
0
    parser.add_argument('--deterministic', '-d', action='store_true')
    parser.add_argument('--seed', type=int, default=301)
    parser.add_argument('--save_file', type=str, default='')
    parser.add_argument('--idProcess', type=int, default='0')
    parser.add_argument('--num_env_interact', type=int, default=1000000)
    parser.add_argument('--steps_per_epoch', type=int, default=30000)
    parser.add_argument('--max_ep_len', type=int, default=1000)
    args = parser.parse_args()

    # Set the seed for reproducibility
    tf.set_random_seed(args.seed)
    np.random.seed(args.seed)

    # Get the policies
    env, get_action, sess = load_policy(args.fpath,
                                        args.itr if args.itr >= 0 else 'last',
                                        args.deterministic)

    # Set the seed of the environment
    print(args.seed, args.idProcess)
    env.seed(args.seed + args.idProcess)

    # Test the given policy
    run_policy(env,
               get_action,
               render=args.render,
               num_env_interact=args.num_env_interact,
               steps_per_epoch=args.steps_per_epoch,
               max_ep_len=args.max_ep_len,
               save_file=args.save_file,
               idProcess=args.idProcess)
示例#3
0
 def load(load_folder, env):
     load_folder = str(load_folder)
     gym_env, get_action, sess = load_policy(load_folder)
     net = MultilayerPerceptron(env, get_action)
     return net, gym_env