Exemplo n.º 1
0
def get_feature_half_planes(state_feature_map, env, start_state, valueFunction, horizon, min_margin = 0, discount = 1.0, threshold = 0.0001):
    #run rollouts counting up the features for each possible action
    opt_action = getOptimalAction(start_state[0], start_state[1], valueFunction)
    half_plane_normals = set() #store the constraints in a set
    #get the feature counts from taking the optimal action
    #print("taking opt", opt_action)
    states_visited, opt_reward = run_rollout(env, start_state, opt_action, valueFunction, render=False)
    #print(states_visited)
    #compute feature counts
    opt_fcounts = compute_feature_counts(state_feature_map, states_visited, discount)
    #print(opt_fcounts)
    best_margin = 0
    #take other actions
    for init_action in range(env.action_space.n):
        if init_action != opt_action:
            #print("init action", init_action)
            states_visited, cum_reward = run_rollout(env, start_state, init_action, valueFunction, render=False)
            #make sure that opt_reward is no worse than cum_reward from other actions
            if opt_reward - cum_reward < 0:
                return set()  #return nothing if opt_action isn't really optimal
            #print(states_visited)
            #compute feature counts
            fcounts = compute_feature_counts(state_feature_map, states_visited, discount)
            #print(fcounts)
            #compute half-plane normal vector
            normal = opt_fcounts - fcounts
            if np.linalg.norm(normal, np.inf) > best_margin:
                best_margin = np.linalg.norm(normal, np.inf)
           
            #check if close to zero
            non_zero = False
            for i in range(len(normal)):
                if np.abs(normal[i]) > threshold:
                    non_zero = True
                else:
                    normal[i] = 0.0 #truncate the normal vector if less than threshold
            if non_zero:
                #normalize normal vector
                normal = normal / np.linalg.norm(normal)
                half_plane_normals.add(tuple(normal))
    if best_margin < min_margin:
        return set()
    else:
        return half_plane_normals
Exemplo n.º 2
0
def get_feature_half_planes(state_feature_map,
                            env,
                            start_state,
                            valueFunction,
                            horizon,
                            discount=1.0):
    #run rollouts counting up the features for each possible action
    for init_action in range(env.action_space.n):
        print("init action", init_action)
        print("opt",
              getOptimalAction(start_state[0], start_state[1], valueFunction))
        states_visited = run_rollout(env,
                                     start_state,
                                     init_action,
                                     valueFunction,
                                     render=True)
        #print(states_visited)
        #compute feature counts
        fcounts = compute_feature_counts(state_feature_map, states_visited,
                                         discount)
        print(fcounts)