示例#1
0
max_path_length = 100
ec = 100.0
k = 0.5
alpha = 0.85

tf.set_random_seed(0)
sess = tf.Session()
sess.__enter__()

# Instantiate the env
data = joblib.load("../CartPole/ControlPolicy/itr_5.pkl")
sut = data['policy']
reward_function = ASTRewardS()

simulator = CartpoleSimulator(sut=sut,
                              max_path_length=max_path_length,
                              use_seed=False,
                              nd=1)
env = TfEnv(
    ASTEnv(
        open_loop=open_loop,
        simulator=simulator,
        fixed_init_state=True,
        s_0=[0.0, 0.0, 0.0 * math.pi / 180, 0.0],
        reward_function=reward_function,
    ))

# Training
with open(osp.join(args.log_dir, 'total_result.csv'), mode='w') as csv_file:
    fieldnames = ['step_count']
    for i in range(top_k):
        fieldnames.append('reward ' + str(i))
k = args.k
alpha = args.alpha
rsg_length = 2

tf.set_random_seed(0)
sess = tf.Session()
sess.__enter__()

# Instantiate the env
data = joblib.load("../CartPole/ControlPolicy/itr_" + str(args.sut_itr) +
                   ".pkl")
sut = data['policy']
reward_function = ASTRewardS()

simulator = CartpoleSimulator(sut=sut,
                              max_path_length=max_path_length,
                              use_seed=True,
                              nd=args.nd)
env = TfEnv(
    ASTEnv(
        open_loop=open_loop,
        simulator=simulator,
        fixed_init_state=True,
        s_0=[0.0, 0.0, 0.0 * math.pi / 180, 0.0],
        reward_function=reward_function,
    ))

# Training
with open(osp.join(args.log_dir, 'total_result.csv'), mode='w') as csv_file:
    fieldnames = ['step_count']
    for i in range(top_k):
        fieldnames.append('reward ' + str(i))
示例#3
0
seed = 0
top_k = 10
max_path_length = 100

top_paths = BPQ.BoundedPriorityQueue(top_k)

np.random.seed(seed)
tf.set_random_seed(seed)
with tf.Session() as sess:
    # Create env

    data = joblib.load("../CartPole/ControlPolicy/itr_5.pkl")
    sut = data['policy']
    reward_function = ASTRewardS()

    simulator = CartpoleSimulator(sut=sut, max_path_length=100, use_seed=False)
    env = ASTEnv(open_loop=False,
                 simulator=simulator,
                 fixed_init_state=True,
                 s_0=[0.0, 0.0, 0.0 * math.pi / 180, 0.0],
                 reward_function=reward_function,
                 )
    env = TfEnv(env)
    # Create policy
    policy = DeterministicMLPPolicy(
        name='ast_agent',
        env_spec=env.spec,
        hidden_sizes=(64, 32),
        output_nonlinearity=tf.nn.tanh,
    )
示例#4
0
top_k = 10

top_paths = BPQ.BoundedPriorityQueue(top_k)

np.random.seed(seed)
tf.set_random_seed(seed)
with tf.Session() as sess:
    # Create env
    control_policy_path = "../CartPole/Data/Train/itr_5.pkl"
    # control_policy_path = "../CartPole/control_policy.pkl"
    print(control_policy_path)
    data = joblib.load(control_policy_path)
    sut = data['policy']
    reward_function = ASTRewardS()

    simulator = CartpoleSimulator(sut=sut, max_path_length=100, use_seed=True, nd=1)
    env = ASTEnv(open_loop=False,
                 simulator=simulator,
                 fixed_init_state=True,
                 s_0=[0.0, 0.0, 0.0 * math.pi / 180, 0.0],
                 reward_function=reward_function,
                 )
    env = TfEnv(env)

    algo = MCTSRS(
        env=env,
        stress_test_num=2,
        max_path_length=100,
        ec=10.0,
        n_itr=100,
        k=0.5,