def post_milp(self, x, x_label, nn, output_flag, t, template) -> List[Experiment.SuccessorInfo]: """milp method""" ranges_probs = self.create_range_bounds_model(template, x, self.env_input_size, nn) post = [] for chosen_action in range(2): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) gurobi_model.setParam('DualReductions', 0) input = generate_input_region(gurobi_model, template, x, self.env_input_size) x_prime = StoppingCarExperimentProbabilistic.apply_dynamic(input, gurobi_model, action=chosen_action, env_input_size=self.env_input_size) gurobi_model.update() gurobi_model.optimize() x_prime_results = optimise(template, gurobi_model, x_prime) if x_prime_results is None: assert x_prime_results is not None successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "policy" # chosen_action successor_info.lb = ranges_probs[chosen_action][0] successor_info.ub = ranges_probs[chosen_action][1] post.append(successor_info) return post
def post_milp(self, x, x_label, nn, output_flag, t, template): """milp method""" post = [] for chosen_action in range(2): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) input = generate_input_region(gurobi_model, template, x, self.env_input_size) # gurobi_model.addConstr(input[0] >= 0, name=f"input_base_constr1") # gurobi_model.addConstr(input[1] >= 0, name=f"input_base_constr2") # gurobi_model.addConstr(input[2] >= 20, name=f"input_base_constr3") observation = gurobi_model.addMVar(shape=(2, ), lb=float("-inf"), ub=float("inf"), name="observation") gurobi_model.addConstr( observation[1] <= input[0] - input[1] + self.input_epsilon / 2, name=f"obs_constr21") gurobi_model.addConstr( observation[1] >= input[0] - input[1] - self.input_epsilon / 2, name=f"obs_constr22") gurobi_model.addConstr(observation[0] <= self.v_lead - input[2] + self.input_epsilon / 2, name=f"obs_constr11") gurobi_model.addConstr(observation[0] >= self.v_lead - input[2] - self.input_epsilon / 2, name=f"obs_constr12") # gurobi_model.addConstr(input[3] <= self.max_speed, name=f"v_constr_input") # gurobi_model.addConstr(input[3] >= -self.max_speed, name=f"v_constr_input") feasible_action = Experiment.generate_nn_guard( gurobi_model, observation, nn, action_ego=chosen_action) # feasible_action = Experiment.generate_nn_guard(gurobi_model, input, nn, action_ego=chosen_action) if feasible_action: # apply dynamic x_prime = StoppingCarExperiment.apply_dynamic( input, gurobi_model, action=chosen_action, env_input_size=self.env_input_size) gurobi_model.update() gurobi_model.optimize() found_successor, x_prime_results = self.h_repr_to_plot( gurobi_model, template, x_prime) if found_successor: # post.append((tuple(x_prime_results),(x, x_label))) successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "policy" # chosen_action # successor_info.lb = ranges_probs[chosen_action][0] # successor_info.ub = ranges_probs[chosen_action][1] post.append(successor_info) return post
def post_milp(self, x, x_label, nn, output_flag, t, template): """milp method""" ranges_probs = self.create_range_bounds_model(template, x, self.env_input_size, nn) post = [] # for split_angle in itertools.product([True, False], repeat=2): # split successor if theta is within safe_angle for chosen_action in range(self.n_actions): # if (chosen_action == 2 or chosen_action == 1) and x_label == 1: # skip actions when battery is dead # continue gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) gurobi_model.setParam('Threads', 2) input = generate_input_region(gurobi_model, template, x, self.env_input_size) max_theta, min_theta, max_theta_dot, min_theta_dot = self.get_theta_bounds( gurobi_model, input) # feasible_action = PendulumExperiment.generate_nn_guard(gurobi_model, input, nn, action_ego=chosen_action, M=1e03) # if feasible_action: # performs action 2 automatically when battery is dead sin_cos_table = self.get_sin_cos_table(max_theta, min_theta, max_theta_dot, min_theta_dot, action=chosen_action) # for normalisation_split in [True,False]: newthdot, newtheta = PendulumExperiment.generate_angle_milp( gurobi_model, input, sin_cos_table) # gurobi_model.addConstr(newtheta >) # apply dynamic x_prime = self.apply_dynamic(input, gurobi_model, newthdot=newthdot, newtheta=newtheta, env_input_size=self.env_input_size, action=chosen_action) # for i, (A, b) in enumerate(self.angle_split): # Experiment.generate_region_constraints(gurobi_model, A, x_prime, b, self.env_input_size, invert=not split_angle[i]) gurobi_model.update() gurobi_model.optimize() if gurobi_model.status != 2: continue found_successor, x_prime_results = self.h_repr_to_plot( gurobi_model, template, x_prime) if found_successor: successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "policy" # chosen_action successor_info.lb = ranges_probs[chosen_action][0] successor_info.ub = ranges_probs[chosen_action][1] post.append(successor_info) return post
def post_milp(self, x, x_label, nn, output_flag, t, template) -> List[Experiment.SuccessorInfo]: """milp method""" ranges_probs = self.create_range_bounds_model(template, x, self.env_input_size, nn) post = [] for chosen_action in range(2): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) gurobi_model.setParam('DualReductions', 0) input = generate_input_region(gurobi_model, template, x, self.env_input_size) observation = gurobi_model.addMVar(shape=(2, ), lb=float("-inf"), ub=float("inf"), name="input") gurobi_model.addConstr( observation[1] <= input[0] - input[1] + self.input_epsilon / 2, name=f"obs_constr21") gurobi_model.addConstr( observation[1] >= input[0] - input[1] - self.input_epsilon / 2, name=f"obs_constr22") gurobi_model.addConstr(observation[0] <= self.v_lead - input[2] + self.input_epsilon / 2, name=f"obs_constr11") gurobi_model.addConstr(observation[0] >= self.v_lead - input[2] - self.input_epsilon / 2, name=f"obs_constr12") feasible_action = Experiment.generate_nn_guard( gurobi_model, observation, nn, action_ego=chosen_action) if feasible_action: x_prime = StoppingCarExperiment.apply_dynamic( input, gurobi_model, action=chosen_action, env_input_size=self.env_input_size) gurobi_model.update() gurobi_model.optimize() found_successor, x_prime_results = self.h_repr_to_plot( gurobi_model, template, x_prime) if found_successor: successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "policy" # chosen_action successor_info.lb = ranges_probs[chosen_action][0] successor_info.ub = ranges_probs[chosen_action][1] post.append(successor_info) return post
def post_milp(self, x, x_label, nn, output_flag, t, template) -> List[Experiment.SuccessorInfo]: """milp method""" ranges_probs = self.create_range_bounds_model(template, x, self.env_input_size, nn) def standard_op(): gurobi_model = grb.Model() gurobi_model.setParam('OutputFlag', output_flag) input = self.generate_input_region(gurobi_model, template, x, self.env_input_size) z = self.apply_dynamic(input, gurobi_model, self.env_input_size) return gurobi_model, z, input post = [] # case 0 gurobi_model, z, input = standard_op() feasible0 = self.generate_guard(gurobi_model, z, case=0) # bounce if feasible0: # action is irrelevant in this case # apply dynamic x_prime = self.apply_dynamic2(z, gurobi_model, case=0, env_input_size=self.env_input_size) found_successor, x_prime_results = self.h_repr_to_plot( gurobi_model, template, x_prime) if found_successor: successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "case0" # doesn't matter successor_info.lb = 1.0 successor_info.ub = 1.0 post.append(successor_info) for chosen_action in range(2): if ranges_probs[chosen_action][ 1] <= 1e-6: # ignore very small probabilities of happening # skip action continue # case 1 : ball going down and hit gurobi_model, z, input = standard_op() feasible11 = self.generate_guard(gurobi_model, z, case=1) if feasible11: feasible12 = chosen_action == 1 # check for action =1 over input (not z!) if feasible12: # apply dynamic x_prime = self.apply_dynamic2( z, gurobi_model, case=1, env_input_size=self.env_input_size) found_successor, x_prime_results = self.h_repr_to_plot( gurobi_model, template, x_prime) if found_successor: successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "policy" successor_info.lb = ranges_probs[chosen_action][0] successor_info.ub = ranges_probs[chosen_action][1] post.append(successor_info) # case 2 : ball going up and hit gurobi_model, z, input = standard_op() feasible21 = self.generate_guard(gurobi_model, z, case=2) if feasible21: feasible22 = chosen_action == 1 # check for action =1 over input (not z!) if feasible22: # apply dynamic x_prime = self.apply_dynamic2( z, gurobi_model, case=2, env_input_size=self.env_input_size) found_successor, x_prime_results = self.h_repr_to_plot( gurobi_model, template, x_prime) if found_successor: successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "policy" successor_info.lb = ranges_probs[chosen_action][0] successor_info.ub = ranges_probs[chosen_action][1] post.append(successor_info) # case 1 alt : ball going down and NO hit gurobi_model, z, input = standard_op() feasible11_alt = self.generate_guard(gurobi_model, z, case=1) if feasible11_alt: feasible12_alt = chosen_action == 0 # check for action = 0 over input (not z!) if feasible12_alt: # apply dynamic x_prime = self.apply_dynamic2( z, gurobi_model, case=3, env_input_size=self.env_input_size) # normal dynamic found_successor, x_prime_results = self.h_repr_to_plot( gurobi_model, template, x_prime) if found_successor: successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "policy" successor_info.lb = ranges_probs[chosen_action][0] successor_info.ub = ranges_probs[chosen_action][1] post.append(successor_info) # case 2 alt : ball going up and NO hit gurobi_model, z, input = standard_op() feasible21_alt = self.generate_guard(gurobi_model, z, case=2) if feasible21_alt: feasible22_alt = chosen_action == 0 # check for action = 0 over input (not z!) if feasible22_alt: # apply dynamic x_prime = self.apply_dynamic2( z, gurobi_model, case=3, env_input_size=self.env_input_size) # normal dynamic found_successor, x_prime_results = self.h_repr_to_plot( gurobi_model, template, x_prime) if found_successor: successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "policy" successor_info.lb = ranges_probs[chosen_action][0] successor_info.ub = ranges_probs[chosen_action][1] post.append(successor_info) # case 3 : ball out of reach and not bounce gurobi_model, z, input = standard_op() feasible3 = self.generate_guard(gurobi_model, z, case=3) # out of reach if feasible3: # action is irrelevant in this case # apply dynamic x_prime = self.apply_dynamic2( z, gurobi_model, case=3, env_input_size=self.env_input_size) # normal dynamic found_successor, x_prime_results = self.h_repr_to_plot( gurobi_model, template, x_prime) if found_successor: successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(x_prime_results) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t + 1 successor_info.action = "case3" # doesn't matter successor_info.lb = 1.0 successor_info.ub = 1.0 post.append(successor_info) return post
def inner_loop_step(self, stats: Experiment.LoopStats, template_2d, template, nn, bar_main): # fills up the worker threads while len(stats.proc_ids) < self.n_workers and len(stats.frontier) != 0: t, (x, x_label) = heapq.heappop(stats.frontier) if self.use_bfs else stats.frontier.pop() if t >= self.time_horizon or (datetime.datetime.now() - stats.start_time).seconds > self.max_elapsed_time: print(f"Discard timestep t={t}") stats.discarded.append((x, x_label)) continue stats.max_t = max(stats.max_t, t) if self.use_contained: contained_flag = False to_remove = [] for (s, s_label) in stats.seen: if s_label == x_label: if contained(x, s): if not self.graph.has_predecessor((x, x_label), (s, x_label)): # ensures that if there was a split it doesn't count as contained self.graph.add_edge((x, x_label), (s, x_label), action="contained", lb=1.0, ub=1.0) contained_flag = True break if contained(s, x): to_remove.append((s, s_label)) for rem in to_remove: stats.num_already_visited += 1 stats.seen.remove(rem) if contained_flag: stats.num_already_visited += 1 continue stats.seen.append((x, x_label)) if self.show_progressbar: bar_main.update(value=bar_main.value + 1, n_workers=len(stats.proc_ids), seen=len(stats.seen), frontier=len(stats.frontier), num_already_visited=stats.num_already_visited, # elapsed_time=(datetime.datetime.now()-stats.start_time).total_seconds()/60.0, max_t=stats.max_t) if self.use_split: if self.max_t_split < 0 or t < self.max_t_split: # limits the splitting to a given timestep if self.use_abstract_mapping: splitted_elements = self.split_item_abstract_mapping(x, [m[0] for m in self.abstract_mapping]) n_fragments = len(splitted_elements) if n_fragments > 1: new_fragments = [] stats.seen.remove((x, x_label)) # remove the parent node from seen if it has been split to prevent unnecessary loops when we check for containment for i, splitted_polytope in enumerate(splitted_elements): successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(splitted_polytope) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t successor_info.action = f"split{i}" new_fragments.append(successor_info) stats.proc_ids.append(ray.put(new_fragments)) continue else: # split on the go if len(list(self.graph.in_edges((x, x_label)))) == 0 or not "split" in self.graph.edges[list(self.graph.in_edges((x, x_label)))[0]].get("action"): if self.can_be_splitted(template, x): splitted_elements = self.check_split(t, x, x_label, nn, bar_main, stats, template, template_2d) n_fragments = len(splitted_elements) if n_fragments > 1: new_fragments = [] stats.seen.remove((x, x_label)) # remove the parent node from seen if it has been split to prevent unnecessary loops when we check for containment for i, (splitted_polytope, probs_range) in enumerate(splitted_elements): successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(splitted_polytope) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t successor_info.action = f"split{i}" new_fragments.append(successor_info) stats.proc_ids.append(ray.put(new_fragments)) continue if self.use_split_with_seen: # split according to the seen elements splitted_elements2 = self.split_item_abstract_mapping(x, [m[0] for m in stats.seen]) # splits according to the seen list n_fragments = len(splitted_elements2) if self.use_split_with_seen and n_fragments > 1: new_fragments = [] stats.seen.remove((x, x_label)) # remove the parent node from seen if it has been split to prevent unnecessary loops when we check for containment for i, splitted_polytope in enumerate(splitted_elements2): successor_info = Experiment.SuccessorInfo() successor_info.successor = tuple(splitted_polytope) successor_info.parent = x successor_info.parent_lbl = x_label successor_info.t = t successor_info.action = f"split{i}" new_fragments.append(successor_info) stats.proc_ids.append(ray.put(new_fragments)) continue # if nothing else applies, compute the successor stats.proc_ids.append(self.post_fn_remote.remote(self, x, x_label, nn, self.output_flag, t, template)) # compute successors if stats.last_time_plot is None or time.time() - stats.last_time_plot >= self.plotting_time_interval: if stats.last_time_plot is not None: self.plot_fn(stats.vertices_list, template, template_2d) stats.last_time_plot = time.time() if self.update_progress_fn is not None: self.update_progress_fn(n_workers=len(stats.proc_ids), seen=len(stats.seen), frontier=len(stats.frontier), num_already_visited=stats.num_already_visited, max_t=stats.max_t) # process the results (if any) new_frontier = self.collect_results(stats, template) if self.avoid_irrelevants: # update prism self.update_prism_step(stats.frontier, new_frontier, stats.root, stats) else: if self.use_bfs: for element in new_frontier: heapq.heappush(stats.frontier, element) else: stats.frontier.extend(new_frontier) # todo go through the tree and decide if we want to split already visited nodes based on the max and min probability of encountering a terminal state stats.new_frontier = [] # resets the new_frontier if self.save_graph: stats.last_time_save = datetime.datetime.now() networkx.write_gpickle(self.graph, os.path.join(self.save_dir, "graph.p")) pickle.dump(stats, open(os.path.join(self.save_dir, "stats.p"), "wb"))