# activations = [] # for i,x_i in enumerate(x): # activations.append(fMap.map_features([x_i,y[i]])) # print(activations) # plt.plot(x, activations) # plt.show() demos = [] writer = open( "data/mcar_birl_steps" + str(num_steps) + "_size" + str(step_size) + "_conf" + str(confidence) + "_seed" + str(seed) + "_demos" + str(reps), "w") for i in range(reps): print(">>>>iteration", i) reward, states_visited, actions_taken, steps = run_episode( env, valueFunction, n, False, EPSILON, get_actions=True) #collect (s,a) pairs if i >= skip_time: demos.extend(zip(states_visited, actions_taken)) print("steps = ", steps) bayesirl = BIRL(solve_mdp, fMap, env, discount) birl_value_fn, birl_reward = bayesirl.get_opt_policy(demos, num_features, confidence, num_steps, step_size, time_limit=200) #pickle the controller (value function) #with open('mcar_maxent_policy_ss.pickle', 'wb') as f:
# x = np.linspace(0,1) # y = np.ones(len(x)) # activations = [] # for i,x_i in enumerate(x): # activations.append(fMap.map_features([x_i,y[i]])) # print(activations) # plt.plot(x, activations) # plt.show() writer = open("data/mcar_mwal_seed" + str(seed) + "_demos" + str(reps), "w") for i in range(reps): print(">>>>iteration", i) reward, states_visited, steps = run_episode(env, valueFunction, n, False, EPSILON) #compute feature counts fcounts = compute_feature_counts(fMap, states_visited, discount, env) print("steps = ", steps) #print("feature count = ", fcounts) features.append(fcounts) features = np.array(features) flabels = [str(c) for c in centers] sign_finder = FeatureSignExtractor(features, flabels) slopes = sign_finder.estimate_signs() fsigns = np.sign(slopes) signedfMap = rbf.SignedRbf_2D_Feature_Map(rbfun, fsigns) # for f in range(len(features[0])):
EPSILON = 0 discount = 1.0 #using no discount factor for now valueFunction = ValueFunction(alpha, numOfTilings) features = [] for i in range(reps): print(">>>>iteration", i) #pick feature map #fMap = Constant_Feature_Map() rbf = RBF(np.array([[-1.2], [-0.3], [0.6]]), 0.7 * np.ones(3), env.action_space.n) fMap = Rbf_Position_Feature_Map(rbf) steps, states_visited = run_episode(env, valueFunction, 1, False, EPSILON) #compute feature counts fcounts = compute_feature_counts(fMap, states_visited, discount) print("steps = ", steps) print("feature count = ", fcounts) features.append(fcounts) plt.plot(range(1, reps + 1), features) plt.legend(['RBF(-1.2)', 'RBF(-0.3)', 'RBF(0.6)']) plt.xlabel("Number of episodes") plt.ylabel("Feature Counts") plt.show()