Пример #1
0
s_grid_velocity = np.linspace(3, 8, 7)
s_grid = (s_grid_height, s_grid_velocity)
a_grid_aoa = np.linspace(00/180*np.pi, 70/180*np.pi, 21)
# a_grid = (a_grid_aoa, )
a_grid_amp = np.linspace(0.9, 1.2, 11)
a_grid = (a_grid_aoa, a_grid_amp)

grids = {'states': s_grid, 'actions': a_grid}
t = TicToc()
t.tic()
Q_map, Q_F, Q_reach = vibly.parcompute_Q_map(grids, p_map, keep_coords=True,
                                          verbose=2)
t.toc()
print("time elapsed: " + str(t.elapsed/60))
Q_V, S_V = vibly.compute_QV(Q_map, grids)
S_M = vibly.project_Q2S(Q_V, grids, proj_opt=np.mean)
Q_M = vibly.map_S2Q(Q_map, S_M, s_grid, Q_V=Q_V)
# plt.scatter(Q_map[1], Q_map[0])
print("non-failing portion of Q: " + str(np.sum(~Q_F)/Q_F.size))
print("viable portion of Q: " + str(np.sum(Q_V)/Q_V.size))

import itertools as it
# Q0 = np.zeros((len(grids['states']), total_gridpoints))
# def create_x0(grids):
#     for idx, state_action in enumerate(np.array(list(
#             it.product(*grids['states'], *grids['actions'])))):


def color_generator(n=1):
    # colors = list()
    colors = np.zeros((n, 3))
Пример #2
0
filename = "./parsimonious0.pickle"
# * Load and unpack

infile = open(filename, 'rb')
data = pickle.load(infile)
infile.close()
# * unpack invariants
# Q_value = data["Q_value"]
R_value = data["R_value"]
grids = data["grids"]
S_M = data["S_M"]
XV = S_M > 0.0

# *
RX_value = vibly.project_Q2S(R_value, grids, proj_opt=np.mean)

# * limit of reward
r_max = RX_value.max()
r_min = RX_value.min()
# * limit of values
alpha = 0.
# mynorm = colors.TwoSlopeNorm(vmin=0., vcenter=0., vmax=2.5)
mynorm = colors.TwoSlopeNorm(vmin=-0.5, vcenter=alpha, vmax=2.5)
mymap = vplot.get_vmap(2)

extent = [
    grids['states'][1][0], grids['states'][1][-1], grids['states'][0][0],
    grids['states'][0][-1]
]
Пример #3
0
# for rdx, reward_functions in enumerate(reward_schemes):
Q_value = None
for failure_penalty in failure_penalties:
    tictoc.tic()
    if Q_value is not None:
        Q_value *= failure_penalty
    Q_value, R_value = control.Q_value_iteration(Q_map,
                                                 grids,
                                                 reward_functions,
                                                 0.6,
                                                 Q_on_grid=Q_on_grid,
                                                 stopping_threshold=1e-6,
                                                 max_iter=1000,
                                                 output_R=True,
                                                 Q_values=Q_value)
    X_value = vibly.project_Q2S(Q_value, grids, proj_opt=np.max)

    time_elapsed = tictoc.toc()
    print("time elapsed (minutes): " + str(time_elapsed / 60.0))

    data2save = {
        "Q_value": Q_value,
        "R_value": R_value,
        "grids": grids,
        "Q_map": Q_map,
        "Q_F": Q_F,
        "Q_V": Q_V,
        "Q_M": Q_M,
        "S_M": S_M,
        "p": p
    }
Пример #4
0
def Q_value_iteration(Q_map, grids, reward_functions, gamma, Q_on_grid=None,
                      stopping_threshold=1e-5, max_iter=1000, output_R=False,
                      neighbor_option=np.mean, 
                      Q_values = None):
    """
    Standard value iteration.
    Inputs:
    Q_map: transition map
    grids: list of grids for states and actions
    reward_functions: list of rewards
    gamma: discount factor
    convergence_threshold: threshold on improvement to stop iterating
    max_iter: maximum number of iterations
    output_R: toggle true to also return array of reward for each (s, a)
    neighbor_option: how to interpolate grid-borders of a bin
    Q_values: initial guess for Q_values, can help speed things up
    """

    if Q_values is None:
        Q_values = np.zeros_like(Q_map, dtype=float)
    else:
        assert (Q_values.shape == Q_map.shape), "initial_guess is bad"

    # can be pre-computed one time
    R_values = np.zeros_like(Q_map, dtype=float)

    s_grid = grids['states']
    a_grid = grids['actions']
    n_states = len(s_grid)

    if Q_on_grid is None:
        for qdx, next_s in np.ndenumerate(Q_map):
            bin_idx = np.unravel_index(next_s, tuple(x+1 for x in map(np.size, s_grid)))
            # pass transition through each reward function
            reward = 0.0
            s = [grid[qdx[i]] for i, grid in enumerate(s_grid)]
            a = [grid[qdx[n_states + i]] for i, grid in enumerate(a_grid)]
            for rfunc in reward_functions:
                reward += rfunc(s, a)
            R_values[qdx] = reward

        # iterate over each q
        for iteration in range(max_iter):
            max_change = 0.0  # for stopping
            for qdx, next_s in np.ndenumerate(Q_map):
                bin_idx = np.unravel_index(next_s, tuple(x+1 for x in map(np.size, s_grid)))
                grid_indices = get_grid_indices(bin_idx, s_grid)
                # average bin value by neighboring q-values from grid
                # bin_value = 0.0
                # for g in grid_indices:
                #     bin_value += Q_values[g].max()/len(grid_indices)
                bin_value = neighbor_option([Q_values[g].max()
                                            for g in grid_indices])


                # keep track fo changes, for stopping condit    ion
                diff = np.abs(Q_values[qdx] - R_values[qdx] - gamma*bin_value)
                if (diff > max_change):
                    max_change = diff
                Q_values[qdx] = R_values[qdx] + gamma*bin_value
            if max_change < stopping_threshold:
                print("Stopped early after ", iteration, " iterations.")
                break
        print("max change in value: ", max_change)

    else:  # * Q_on_grid given
        for qdx, next_s in np.ndenumerate(Q_map):
            # * Pre-compute R
            # pass transition through each reward function
            reward = 0.0
            s = [grid[qdx[i]] for i, grid in enumerate(s_grid)]
            a = [grid[qdx[n_states + i]] for i, grid in enumerate(a_grid)]
            for rfunc in reward_functions:
                reward += rfunc(s, a)
            R_values[qdx] = reward

        # iterate over each q
        for iteration in range(max_iter):
            max_change = 0.0  # for stopping
            X_val = project_Q2S(Q_values, grids, proj_opt=np.max)
            X_val = X_val.flatten()
            for qdx, next_s in np.ndenumerate(Q_map):
                # average bin value by neighboring q-values from grid
                # bin_value = 0.0
                # for g in grid_indices:
                #     bin_value += Q_values[g].max()/len(grid_indices)
                # bin_value = neighbor_option([Q_values[g].max()
                #                             for g in grid_indices])
                q_value = X_val[next_s]
                # keep track fo changes, for stopping condit    ion
                diff = np.abs(Q_values[qdx] - R_values[qdx] - gamma*q_value)
                if (diff > max_change):
                    max_change = diff
                Q_values[qdx] = R_values[qdx] + gamma*q_value
            if max_change < stopping_threshold:
                print("Stopped early after ", iteration, " iterations.")
                break
        print("max change in value: ", max_change)

    if output_R:
        return Q_values, R_values
    else:
        return Q_values
Пример #5
0
    def compute_viability(x0, p, name, visualise=False):

        # * Solve for nominal open-loop limit-cycle

        # * Set-up P maps for computations
        p_map = model.poincare_map
        p_map.p = p
        p_map.x = x0.copy()

        # * choose high-level represenation

        p_map.xp2s = model.xp2s_y_xdot
        # * this maps the full simulated state to the high-level representation
        # * in this case the relevant states at apex:
        # * (y, xdot), in other words the (height, velocity)

        p_map.sa2xp = model.sa2xp_y_xdot_timedaoa
        # * this maps the high-level representation of state and actions back
        # * to the full state and parameters used for the simulation

        # p_map.sa2xp = model.sa2xp_amp
        # * this representation includes an amplification coefficient `a' for
        # * the muscle activation a*f(t). It adds a dimension to the grids,
        # * which substantially increases computation time, and also makes
        # * visualization much less straightforward (due to the 4-dimensional
        # * state-action space).
        # * We have tried this out, and a from a preliminary look, there does
        # * not seem to be much qualitative difference in the results for the
        # * nominal limit-cycle used in the paper. For other conditions, it may
        # * be important to include this (or other additional control inputs)
        # * in the model.

        # * set up grids for computing the viable set and measure
        # * a denser grid will yield more precision, but require more compute
        s_grid_height = np.linspace(0.05, 0.5, 91)
        s_grid_velocity = np.linspace(0, 10.0, 101)
        s_grid = (s_grid_height, s_grid_velocity)
        a_grid_aoa = np.linspace(0 / 180 * np.pi, 90 / 180 * np.pi, 91)
        a_grid = (a_grid_aoa, )

        # * if you use the representation `sa2xp_amp` (see above), the
        # * action grid also includes an extra dimension
        # a_grid_amp = np.linspace(0.75, 1.25, 11)
        # a_grid = (a_grid_aoa, a_grid_amp)

        grids = {'states': s_grid, 'actions': a_grid}

        # * compute transition matrix and boolean matrix of failures
        Q_map, Q_F = vibly.parcompute_Q_map(grids, p_map, verbose=1)

        # * compute viable sets
        Q_V, S_V = vibly.compute_QV(Q_map, grids)
        # * compute the measure in state-space
        S_M = vibly.project_Q2S(Q_V, grids, proj_opt=np.mean)
        # * map the measure to Q-space
        Q_M = vibly.map_S2Q(Q_map, S_M, s_grid, Q_V=Q_V)

        print("non-failing portion of Q: " + str(np.sum(~Q_F) / Q_F.size))
        print("viable portion of Q: " + str(np.sum(Q_V) / Q_V.size))

        # * save data
        if not os.path.exists(name):
            os.makedirs(name)
        filename = name + '/' + name + '_' + '{:.4f}'.format(damping)

        data2save = {
            "grids": grids,
            "Q_map": Q_map,
            "Q_F": Q_F,
            "Q_V": Q_V,
            "Q_M": Q_M,
            "S_M": S_M,
            "p": p,
            "x0": x0
        }
        outfile = open(filename + '.pickle', 'wb')
        pickle.dump(data2save, outfile)
        outfile.close()

        if visualise:
            print("SAVING FIGURE")
            print(" ")
            plt.figure()
            plt.imshow(S_M, origin='lower', vmin=0, vmax=1, cmap='viridis')
            plt.title('bird ' + name)
            plt.savefig(filename + '.pdf', format='pdf')
            # plt.show()  # to just see it on the fly
            plt.close()
Пример #6
0
    Q_map = data['Q_map']

    Q_F = data['Q_F']
    x0 = data['x0']
    poincare_map = data['P_map']
    p = data['p']
    grids = data['grids']

    ################################################################################
    # Compute measure from grid for warm-start
    ################################################################################

    Q_V, S_V = vibly.compute_QV(Q_map, grids)

    S_M = vibly.project_Q2S(Q_V, grids, np.mean)
    # S_M = vibly.project_Q2S(Q_V, grids, np.mean)

    #S_M = S_M / grids['actions'][0].size
    Q_M = vibly.map_S2Q(Q_map, S_M, Q_V)
    plt.plot(S_M)
    plt.show()
    plt.imshow(Q_M, origin='lower')
    plt.show()

    ################################################################################
    # Create estimation object
    ################################################################################

    AS_grid = np.meshgrid(grids['actions'][0], grids['states'][0])
    estimation = MeasureEstimation(state_dim=1, action_dim=1, seed=1)