Пример #1
0
def main():
    """ Main function of the script. """
    directory = Path(tempfile.gettempdir(), "dmpbbo",
                     "demoBboMultipleCostComponents")
    if len(sys.argv) > 1:
        directory = sys.argv[1]

    n_dims = 2
    minimum = np.full(n_dims, 2.0)
    regularization_weight = 1.0
    cost_function = DemoCostFunctionDistanceToPoint(minimum,
                                                    regularization_weight)

    mean_init = np.full(n_dims, 5.0)
    covar_init = 4.0 * np.eye(n_dims)
    distribution = DistributionGaussian(mean_init, covar_init)

    updater = UpdaterCovarDecay(eliteness=10,
                                weighting_method="PI-BB",
                                covar_decay_factor=0.8)

    n_samples_per_update = 20
    n_updates = 40

    session = run_optimization(cost_function, distribution, updater, n_updates,
                               n_samples_per_update, directory)

    session.plot()
    plt.show()
Пример #2
0
def main():
    """ Main function of the script. """
    directory = sys.argv[1] if len(sys.argv) > 1 else None

    n_dims = 2
    minimum = np.full(n_dims, 2.0)
    cost_function = DemoCostFunctionDistanceToPoint(minimum)

    updaters = {}  # noqa

    updaters["fixed_exploration"] = UpdaterMean(eliteness=10,
                                                weighting_method="PI-BB")

    updaters["covar_decay"] = UpdaterCovarDecay(eliteness=10,
                                                weighting_method="PI-BB",
                                                covar_decay_factor=0.8)

    updaters["covar_adaptation"] = UpdaterCovarAdaptation(
        eliteness=10,
        weighting_method="PI-BB",
        max_level=None,
        min_level=0.000001,
        diag_only=False,
        learning_rate=0.8,
    )

    for name, updater in updaters.items():
        print("Distribution updater: " + name)

        mean_init = np.full(n_dims, 5.0)
        covar_init = 1.0 * np.eye(n_dims)
        distribution = DistributionGaussian(mean_init, covar_init)

        n_samples_per_update = 20
        n_updates = 40

        cur_directory = None
        if directory:
            cur_directory = Path(directory, name)

        session = run_optimization(cost_function, distribution, updater,
                                   n_updates, n_samples_per_update,
                                   cur_directory)
        fig = session.plot()
        fig.canvas.set_window_title(f"Optimization with covar_update={name}")

    plt.show()
Пример #3
0
    def update_distribution(self, distribution, samples, costs):
        """ Update a distribution with reward-weighted averaging.

        @param distribution: Distribution before the update
        @param samples: Samples in parameter space.
        @param costs: The cost of each sample.
        @return: The updated distribution.
        """

        weights = costs_to_weights(costs, self.weighting_method,
                                   self.eliteness)

        # Compute new mean with reward-weighed averaging
        # mean    = 1 x n_dims
        # weights = 1 x n_samples
        # samples = n_samples x n_dims
        mean_new = np.average(samples, 0, weights)

        # Update the covariance matrix
        distribution_new = DistributionGaussian(mean_new, distribution.covar)

        return distribution_new, weights
Пример #4
0
def run_optimization(stochastic_field, directory=None):

    # Main parameter of the experiment
    gain_min = 10.0
    gain_max = 1000.0
    gain_initial = 10.0

    # Some DMP parameters
    n_time_steps = 51
    tau = 1.0
    y_init = np.array([0.0])
    y_attr = np.array([1.0])
    n_dims = len(y_init)
    ts = np.linspace(0, tau, n_time_steps)

    # Train the DMP from a min-jerk trajectory, and constant gains
    traj = Trajectory.from_min_jerk(ts, y_init, y_attr)
    schedule = np.full((n_time_steps, n_dims), gain_initial)
    traj.misc = schedule
    function_apps = [FunctionApproximatorRBFN(7, 0.95) for _ in range(n_dims)]
    function_apps_schedules = [
        FunctionApproximatorRBFN(5, 0.9) for _ in range(n_dims)
    ]
    dmp = DmpWithSchedules.from_traj_sched(traj,
                                           function_apps,
                                           function_apps_schedules,
                                           min_schedules=gain_min,
                                           max_schedules=gain_max)

    # xs, xds, sched, _, _ = dmp.analytical_solution_sched(ts)
    # dmp.plot_sched(ts, xs, xds, sched)
    # plt.show()

    # Determine the size of the search space for DMP weights and gain schedules
    dmp.set_selected_param_names(["weights"])
    n_search_traj = dmp.get_param_vector_size()
    dmp.set_selected_param_names(["sched_weights"])
    n_search_gains = dmp.get_param_vector_size()
    # We know the search spaces now: optimize both.
    dmp.set_selected_param_names(["weights", "sched_weights"])

    # Make the task
    viapoint = np.full((1, n_dims), 0.5)
    viapoint_time = 0.5
    task = TaskViapointWithGains(
        viapoint,
        viapoint_time,
        gain_min,
        gain_max,
        viapoint_weight=3.0,
        acceleration_weight=0.0,
        gain_weight=1.0,
    )

    # Make task solver, based on a Dmp
    dt = 0.05
    integrate_dmp_beyond_tau_factor = 1.2
    task_solver = TaskSolverDmpWithGainsAndForceField(
        dmp, dt, integrate_dmp_beyond_tau_factor, stochastic_field)

    # Determine the initial covariance matrix, and its updater
    mean_init = dmp.get_param_vector()
    sigma_traj = 10.0
    sigma_gains = 50.0
    sigmas = np.concatenate(
        (np.full(n_search_traj,
                 sigma_traj), np.full(n_search_gains, sigma_gains)))
    covar_init = np.diag(np.square(sigmas))
    distribution = DistributionGaussian(mean_init, covar_init)
    updater = UpdaterCovarDecay(eliteness=10,
                                weighting_method="PI-BB",
                                covar_decay_factor=0.98)

    n_samples_per_update = 10
    n_updates = 50
    session = run_optimization_task(task, task_solver, distribution, updater,
                                    n_updates, n_samples_per_update, directory)

    ax = plt.figure(figsize=(5, 5)).add_subplot(1, 1, 1)
    for i_update in range(n_updates):
        handle, _ = session.plot_rollouts_update(i_update, ax=ax)
        session._set_style(handle, i_update, n_updates)

    window_label = "stochastic" if stochastic_field else "constant"
    plt.gcf().canvas.set_window_title(window_label + " force field")
    return session
Пример #5
0
def run_demo(directory, n_dims):
    """ Run one demo for bbo_of_dmps.

    @param directory: Directory to save results to
    @param n_dims: Number of dimensions of the task (i.e. the viapoint)
    """

    # Some DMP parameters
    tau = 0.5
    y_init = np.linspace(1.8, 2.0, n_dims)
    y_attr = np.linspace(4.0, 3.0, n_dims)

    # initialize function approximators with random values
    function_apps = []
    intersection_height = 0.8
    for n_basis in [6, 7]:
        fa = FunctionApproximatorRBFN(n_basis, intersection_height)
        fa.train(np.linspace(0, 1, 100), np.zeros(100))

        fa.set_selected_param_names("weights")
        random_weights = 10.0 * np.random.normal(0, 1, n_basis)
        fa.set_param_vector(random_weights)

        function_apps.append(fa)

    # Initialize Dmp
    dmp = Dmp(tau, y_init, y_attr, function_apps)
    dmp.set_selected_param_names("weights")
    # dmp.set_selected_param_names(['goal','weights'])

    # Make the task
    viapoint = 3 * np.ones(n_dims)
    viapoint_time = (
        0.3 if n_dims == 1 else None
    )  # None means: Do not pass through viapoint at a specific time,
    # but rather pass through it at any time.

    task = TaskViapoint(
        viapoint,
        viapoint_time=viapoint_time,
        viapoint_radius=0.1,
        goal=y_attr,
        goal_time=1.1 * tau,
        viapoint_weight=1.0,
        acceleration_weight=0.0001,
        goal_weight=0.0,
    )

    # Make task solver, based on a Dmp
    dt = 0.01
    integrate_dmp_beyond_tau_factor = 1.5
    task_solver = TaskSolverDmp(dmp, dt, integrate_dmp_beyond_tau_factor)

    n_search = dmp.get_param_vector_size()

    mean_init = np.full(n_search, 0.0)
    covar_init = 1000.0 * np.eye(n_search)
    distribution = DistributionGaussian(mean_init, covar_init)

    covar_update = "cma"
    if covar_update == "none":
        updater = UpdaterMean(eliteness=10, weighting_method="PI-BB")
    elif covar_update == "decay":
        updater = UpdaterCovarDecay(eliteness=10,
                                    weighting_method="PI-BB",
                                    covar_decay_factor=0.9)
    else:
        updater = UpdaterCovarAdaptation(
            eliteness=10,
            weighting_method="PI-BB",
            max_level=None,
            min_level=1.0,
            diag_only=False,
            learning_rate=0.5,
        )

    n_samples_per_update = 10
    n_updates = 40

    session = run_optimization_task(task, task_solver, distribution, updater,
                                    n_updates, n_samples_per_update, directory)
    fig = session.plot()
    fig.canvas.set_window_title(
        f"Optimization with covar_update={covar_update}")
Пример #6
0
def main():
    """ Main function that is called when executing the script. """

    parser = argparse.ArgumentParser()
    parser.add_argument("dmp", help="input dmp")
    parser.add_argument("output_directory",
                        help="directory to write results to")
    parser.add_argument("--sigma",
                        help="sigma of covariance matrix",
                        type=float,
                        default=3.0)
    parser.add_argument("--n", help="number of samples", type=int, default=10)
    parser.add_argument("--traj",
                        action="store_true",
                        help="integrate DMP and save trajectory")
    parser.add_argument("--show",
                        action="store_true",
                        help="show result plots")
    parser.add_argument("--save",
                        action="store_true",
                        help="save result plots to png")
    args = parser.parse_args()

    sigma_dir = "sigma_%1.3f" % args.sigma
    directory = Path(args.output_directory, sigma_dir)

    filename = args.dmp
    print(f"Loading DMP from: {filename}")
    dmp = jc.loadjson(filename)
    ts = dmp.ts_train
    parameter_vector = dmp.get_param_vector()

    n_samples = args.n
    sigma = args.sigma
    covar_init = sigma * sigma * np.eye(parameter_vector.size)
    distribution = DistributionGaussian(parameter_vector, covar_init)

    filename = Path(directory, f"distribution.json")
    print(f"Saving sampling distribution to: {filename}")
    os.makedirs(directory, exist_ok=True)
    jc.savejson(filename, distribution)

    samples = distribution.generate_samples(n_samples)

    if args.show or args.save:
        fig = plt.figure()

        ax1 = fig.add_subplot(121)  # noqa
        distribution.plot(ax1)
        ax1.plot(samples[:, 0], samples[:, 1], "o", color="#BBBBBB")

        ax2 = fig.add_subplot(122)

        xs, xds, _, _ = dmp.analytical_solution()
        traj_mean = dmp.states_as_trajectory(ts, xs, xds)
        lines, _ = traj_mean.plot([ax2])
        plt.setp(lines, linewidth=4, color="#007700")

    for i_sample in range(n_samples):

        dmp.set_param_vector(samples[i_sample, :])

        filename = Path(directory, f"{i_sample:02}_dmp")
        print(f"Saving sampled DMP to: {filename}.json")
        jc.savejson(str(filename) + ".json", dmp)
        jc.savejson_for_cpp(str(filename) + "_for_cpp.json", dmp)

        if args.show or args.save or args.traj:
            xs, xds, forcing, fa_outputs = dmp.analytical_solution()
            traj_sample = dmp.states_as_trajectory(ts, xs, xds)
            if args.traj:
                filename = Path(directory, f"{i_sample:02}_traj.txt")
                print(f"Saving sampled trajectory to: {filename}")
                traj_sample.savetxt(filename)
            if args.show or args.save:
                lines, _ = traj_sample.plot([ax2])  # noqa
                plt.setp(lines, color="#BBBBBB", alpha=0.5)

    if args.save:
        filename = "exploration_dmp_traj.png"
        plt.gcf().savefig(Path(directory, filename))

    if args.show:
        plt.show()
Пример #7
0
    def update_distribution(self, distribution, samples, costs):
        """ Update a distribution with reward-weighted averaging.

        @param distribution: Distribution before the update
        @param samples: Samples in parameter space.
        @param costs: The cost of each sample.
        @return: The updated distribution.
        """

        mean_cur = distribution.mean
        covar_cur = distribution.covar
        n_samples = samples.shape[0]
        n_dims = samples.shape[1]

        weights = costs_to_weights(costs, self.weighting_method,
                                   self.eliteness)

        # Compute new mean with reward-weighed averaging
        # mean    = 1 x n_dims
        # weights = 1 x n_samples
        # samples = n_samples x n_dims
        mean_new = np.average(samples, 0, weights)

        eps = samples - np.tile(mean_cur, (n_samples, 1))
        weights_tiled = np.tile(np.asarray([weights]).transpose(), (1, n_dims))
        weighted_eps = np.multiply(weights_tiled, eps)
        covar_new = np.dot(weighted_eps.transpose(), eps)

        # Remove non-diagonal values
        if self.diag_only:
            diag_vec = np.diag(covar_new)
            covar_new = np.diag(diag_vec)

        # Low-pass filter for covariance matrix, i.e. weight between current
        # and new covariance matrix.

        if self.learning_rate < 1.0:
            lr = self.learning_rate  # For legibility
            covar_new = (1 - lr) * covar_cur + lr * covar_new

        # Set a maximum value for the diagonal to avoid too much exploration
        level_max = None
        if self.diagonal_max is not None:
            is_scalar_max = np.isscalar(self.diagonal_max)
            if is_scalar_max:
                level_max = pow(self.diagonal_max, 2)
            for ii in range(n_dims):
                if not is_scalar_max:
                    level_max = pow(self.diagonal_max[ii], 2)
                if covar_new[ii, ii] > level_max:
                    covar_new[ii, ii] = level_max

        # Set a minimum value for the diagonal to avoid pre-mature convergence
        level_min = None
        if self.diagonal_min is not None:
            is_scalar_min = np.isscalar(self.diagonal_min)
            if is_scalar_min:
                level_min = pow(self.diagonal_min, 2)
            for ii in range(n_dims):
                if not is_scalar_min:
                    level_min = pow(self.diagonal_min[ii], 2)
                if covar_new[ii, ii] < level_min:
                    covar_new[ii, ii] = level_min

        # Update the covariance matrix
        distribution_new = DistributionGaussian(mean_new, covar_new)

        return distribution_new, weights
Пример #8
0
def run_demo(directory, n_dims):
    """ Run one demo for bbo_of_dmps (with single updates)

    @param directory: Directory to save results to
    @param n_dims: Number of dimensions of the task (i.e. the viapoint)
    """

    # Some DMP parameters
    tau = 0.5
    y_init = np.linspace(1.8, 2.0, n_dims)
    y_attr = np.linspace(4.0, 3.0, n_dims)

    # initialize function approximators with random values
    function_apps = []
    intersection_height = 0.8
    for n_basis in [6, 7]:
        fa = FunctionApproximatorRBFN(n_basis, intersection_height)
        fa.train(np.linspace(0, 1, 100), np.zeros(100))

        fa.set_selected_param_names("weights")
        random_weights = 10.0 * np.random.normal(0, 1, n_basis)
        fa.set_param_vector(random_weights)

        function_apps.append(fa)

    # Initialize Dmp
    dmp = Dmp(tau, y_init, y_attr, function_apps)
    dmp.set_selected_param_names("weights")
    # dmp.set_selected_param_names(['goal','weights'])

    # Make the task
    viapoint = 3 * np.ones(n_dims)
    viapoint_time = (
        0.3 if n_dims == 1 else None
    )  # None means: Do not pass through viapoint at a specific time,
    # but rather pass through it at any time.

    task = TaskViapoint(
        viapoint,
        viapoint_time=viapoint_time,
        viapoint_radius=0.1,
        goal=y_attr,
        goal_time=1.1 * tau,
        viapoint_weight=1.0,
        acceleration_weight=0.0001,
        goal_weight=0.0,
    )

    # Make task solver, based on a Dmp
    dt = 0.01
    integrate_dmp_beyond_tau_factor = 1.5
    task_solver = TaskSolverDmp(dmp, dt, integrate_dmp_beyond_tau_factor)

    n_search = dmp.get_param_vector_size()

    mean_init = np.full(n_search, 0.0)
    covar_init = 1000.0 * np.eye(n_search)
    distribution = DistributionGaussian(mean_init, covar_init)

    updater = UpdaterCovarDecay(eliteness=10, weighting_method="PI-BB", covar_decay_factor=0.9)

    n_samples_per_update = 10
    n_updates = 20

    session = prepare_optimization(
        directory, task, task_solver, distribution, n_samples_per_update, updater, dmp
    )

    for i_update in range(n_updates):
        dmp_eval = session.ask("dmp", i_update, "eval")
        cost_vars_eval = task_solver.perform_rollout_dmp(dmp_eval)
        session.tell(cost_vars_eval, "cost_vars", i_update, "eval")

        for i_sample in range(n_samples_per_update):
            dmp_sample = session.ask("dmp", i_update, i_sample)
            cost_vars = task_solver.perform_rollout_dmp(dmp_sample)
            session.tell(cost_vars, "cost_vars", i_update, i_sample)

        update_step(session, i_update)

    return session.plot()