Exemplo n.º 1
0
 def generate_nn_polyhedral_guard(self, nn, chosen_action, output_flag):
     gurobi_model = grb.Model()
     gurobi_model.setParam('OutputFlag', output_flag)
     gurobi_model.setParam('Threads', 2)
     observation = gurobi_model.addMVar(shape=(2, ),
                                        lb=float("-inf"),
                                        ub=float("inf"),
                                        name="observation")
     Experiment.generate_nn_guard(gurobi_model,
                                  observation,
                                  nn,
                                  action_ego=chosen_action)
     observable_template = Experiment.octagon(2)
     self.env_input_size = 2
     observable_result = self.optimise(observable_template, gurobi_model,
                                       observation)
     self.env_input_size = 6
     return observable_template, observable_result
Exemplo n.º 2
0
def run_parameterised_experiment(config):
    # Hyperparameters
    trial_dir = tune.get_trial_dir()
    problem, method, other_config = config["main_params"]
    n_workers = config["n_workers"]

    experiment = CartpoleExperiment()
    experiment.nn_path = other_config[
        "folder"]  # nn_paths_cartpole[other_config["nn_path"]]
    experiment.tau = other_config["tau"]
    if other_config["template"] == 2:  # octagon
        experiment.analysis_template = Experiment.octagon(
            experiment.env_input_size)
    elif other_config["template"] == 0:  # box
        experiment.analysis_template = Experiment.box(
            experiment.env_input_size)
    else:
        _, template = experiment.get_template(1)
        experiment.analysis_template = template  # standard
    experiment.n_workers = n_workers
    experiment.show_progressbar = False
    experiment.show_progress_plot = False
    # experiment.use_rounding = False
    experiment.save_dir = trial_dir
    experiment.update_progress_fn = update_progress
    elapsed_seconds, safe, max_t = experiment.run_experiment()

    safe_value = 0
    if safe is None:
        safe_value = 0
    elif safe:
        safe_value = 1
    elif not safe:
        safe_value = -1
    tune.report(elapsed_seconds=elapsed_seconds,
                safe=safe_value,
                max_t=max_t,
                done=True)
Exemplo n.º 3
0
def run_parameterised_experiment(config):
    # Hyperparameters
    trial_dir = tune.get_trial_dir()
    problem, method, other_config = config["main_params"]
    n_workers = config["n_workers"]
    if problem == "bouncing_ball":
        if method == "ora":
            experiment = ORABouncingBallExperiment()
        else:
            experiment = BouncingBallExperiment()
        experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_bouncing_ball[other_config["nn_path"]])
        experiment.tau = other_config["tau"]
        if other_config["template"] == 2:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            raise NotImplementedError()
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        experiment.save_dir = trial_dir
        experiment.update_progress_fn = update_progress
        elapsed_seconds, safe, max_t = experiment.run_experiment()
    elif problem == "stopping_car":
        if method == "ora":
            experiment = ORAStoppingCarExperiment()
            experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_stopping_car[other_config["nn_path"]])
            experiment.input_epsilon = other_config["epsilon_input"]
            if other_config["template"] == 2:  # octagon
                experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
            elif other_config["template"] == 0:  # box
                experiment.analysis_template = Experiment.box(experiment.env_input_size)
            else:
                _, template = experiment.get_template(1)
                experiment.analysis_template = template  # standard
            experiment.n_workers = n_workers
            experiment.show_progressbar = False
            experiment.show_progress_plot = False
            experiment.save_dir = trial_dir
            experiment.update_progress_fn = update_progress
            elapsed_seconds, safe, max_t = experiment.run_experiment()
        else:
            experiment = StoppingCarExperiment()
            experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_stopping_car[other_config["nn_path"]])
            experiment.input_epsilon = other_config["epsilon_input"]
            if other_config["template"] == 2:  # octagon
                experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
            elif other_config["template"] == 0:  # box
                experiment.analysis_template = Experiment.box(experiment.env_input_size)
            else:
                _, template = experiment.get_template(1)
                experiment.analysis_template = template  # standard
            experiment.n_workers = n_workers
            experiment.show_progressbar = False
            experiment.show_progress_plot = False
            experiment.save_dir = trial_dir
            experiment.update_progress_fn = update_progress
            elapsed_seconds, safe, max_t = experiment.run_experiment()
    else:
        if method == "ora":
            experiment = ORACartpoleExperiment()
        else:
            experiment = CartpoleExperiment()

        experiment.nn_path = os.path.join(utils.get_save_dir(), nn_paths_cartpole[other_config["nn_path"]])
        experiment.tau = other_config["tau"]
        if other_config["template"] == 2:  # octagon
            experiment.analysis_template = Experiment.octagon(experiment.env_input_size)
        elif other_config["template"] == 0:  # box
            experiment.analysis_template = Experiment.box(experiment.env_input_size)
        else:
            _, template = experiment.get_template(1)
            experiment.analysis_template = template  # standard
        experiment.n_workers = n_workers
        experiment.show_progressbar = False
        experiment.show_progress_plot = False
        # experiment.use_rounding = False
        experiment.save_dir = trial_dir
        experiment.update_progress_fn = update_progress
        elapsed_seconds, safe, max_t = experiment.run_experiment()
    safe_value = 0
    if safe is None:
        safe_value = 0
    elif safe:
        safe_value = 1
    elif not safe:
        safe_value = -1
    tune.report(elapsed_seconds=elapsed_seconds, safe=safe_value, max_t=max_t, done=True)
    def get_nn(self):
        config = get_PPO_config(1234)
        trainer = ppo.PPOTrainer(config=config)
        trainer.restore(self.nn_path)
        policy = trainer.get_policy()
        sequential_nn = convert_ray_policy_to_sequential(policy).cpu()
        # l0 = torch.nn.Linear(6, 2, bias=False)
        # l0.weight = torch.nn.Parameter(torch.tensor([[0, 0, 1, -1, 0, 0], [1, -1, 0, 0, 0, 0]], dtype=torch.float32))
        # layers = [l0]
        # for l in sequential_nn:
        #     layers.append(l)
        #
        # nn = torch.nn.Sequential(*layers)
        nn = sequential_nn
        # ray.shutdown()
        return nn


if __name__ == '__main__':
    ray.init(log_to_driver=False, local_mode=False)
    experiment = StoppingCarExperiment()
    experiment.plotting_time_interval = 60 * 2
    experiment.show_progressbar = True
    experiment.show_progress_plot = False
    template = Experiment.octagon(experiment.env_input_size)
    experiment.analysis_template = template  # standard
    input_boundaries = [40, -30, 10, -0, 28, -28, 36, -36, 0, -0, 0, -0, 0]
    experiment.input_boundaries = input_boundaries
    experiment.time_horizon = 150
    experiment.run_experiment()