max_path_length = 100 ec = 100.0 k = 0.5 alpha = 0.85 tf.set_random_seed(0) sess = tf.Session() sess.__enter__() # Instantiate the env data = joblib.load("../CartPole/ControlPolicy/itr_5.pkl") sut = data['policy'] reward_function = ASTRewardS() simulator = CartpoleSimulator(sut=sut, max_path_length=max_path_length, use_seed=False, nd=1) env = TfEnv( ASTEnv( open_loop=open_loop, simulator=simulator, fixed_init_state=True, s_0=[0.0, 0.0, 0.0 * math.pi / 180, 0.0], reward_function=reward_function, )) # Training with open(osp.join(args.log_dir, 'total_result.csv'), mode='w') as csv_file: fieldnames = ['step_count'] for i in range(top_k): fieldnames.append('reward ' + str(i))
k = args.k alpha = args.alpha rsg_length = 2 tf.set_random_seed(0) sess = tf.Session() sess.__enter__() # Instantiate the env data = joblib.load("../CartPole/ControlPolicy/itr_" + str(args.sut_itr) + ".pkl") sut = data['policy'] reward_function = ASTRewardS() simulator = CartpoleSimulator(sut=sut, max_path_length=max_path_length, use_seed=True, nd=args.nd) env = TfEnv( ASTEnv( open_loop=open_loop, simulator=simulator, fixed_init_state=True, s_0=[0.0, 0.0, 0.0 * math.pi / 180, 0.0], reward_function=reward_function, )) # Training with open(osp.join(args.log_dir, 'total_result.csv'), mode='w') as csv_file: fieldnames = ['step_count'] for i in range(top_k): fieldnames.append('reward ' + str(i))
seed = 0 top_k = 10 max_path_length = 100 top_paths = BPQ.BoundedPriorityQueue(top_k) np.random.seed(seed) tf.set_random_seed(seed) with tf.Session() as sess: # Create env data = joblib.load("../CartPole/ControlPolicy/itr_5.pkl") sut = data['policy'] reward_function = ASTRewardS() simulator = CartpoleSimulator(sut=sut, max_path_length=100, use_seed=False) env = ASTEnv(open_loop=False, simulator=simulator, fixed_init_state=True, s_0=[0.0, 0.0, 0.0 * math.pi / 180, 0.0], reward_function=reward_function, ) env = TfEnv(env) # Create policy policy = DeterministicMLPPolicy( name='ast_agent', env_spec=env.spec, hidden_sizes=(64, 32), output_nonlinearity=tf.nn.tanh, )
top_k = 10 top_paths = BPQ.BoundedPriorityQueue(top_k) np.random.seed(seed) tf.set_random_seed(seed) with tf.Session() as sess: # Create env control_policy_path = "../CartPole/Data/Train/itr_5.pkl" # control_policy_path = "../CartPole/control_policy.pkl" print(control_policy_path) data = joblib.load(control_policy_path) sut = data['policy'] reward_function = ASTRewardS() simulator = CartpoleSimulator(sut=sut, max_path_length=100, use_seed=True, nd=1) env = ASTEnv(open_loop=False, simulator=simulator, fixed_init_state=True, s_0=[0.0, 0.0, 0.0 * math.pi / 180, 0.0], reward_function=reward_function, ) env = TfEnv(env) algo = MCTSRS( env=env, stress_test_num=2, max_path_length=100, ec=10.0, n_itr=100, k=0.5,