def value_iteration(nnet, device, env: Environment, states: List[State]) -> List[float]: children_cost_list = env.expand(states) children = children_cost_list[0] transition_costs = children_cost_list[1] flat_children, index_children = flatten(children) #pdb.set_trace() inputs_tensor = torch.from_numpy( env.state_to_nnet_input(flat_children)).float() output_tensor = nnet(inputs_tensor).data.numpy()[:, 0] #pdb.set_trace() targets_list = np.ndarray = np.array(transition_costs) + np.array( unflatten(list(output_tensor), index_children)) targets = np.min(targets_list, axis=1) #pdb.set_trace() is_solved: np.array = np.array(env.is_solved(states)) #pdb.set_trace() targets = targets * np.logical_not(is_solved) #pdb.set_trace() return targets
def generate_plot(nnet: nn.Module(), device, env: Environment, states: List[State], outputs: np.array): nnet.eval() states_targ_nnet: np.ndarray = env.state_to_nnet_input(states) out_nnet = nnet(states_nnet_to_pytorch_input(states_targ_nnet, device).float()).cpu().data.numpy() out_nnet, _ = flatten(out_nnet) outputs, _ = flatten(outputs) out_nnet_array = np.array(out_nnet) outputs_array = np.array(outputs) random_indexs = list(range(len(out_nnet_array))) random.shuffle(random_indexs) random_states: np.ndarray = [] sample_expected: np.ndarray = [] sample_outputs: np.ndarray = [] for i in range(100): random_states.append(states[random_indexs[i]]) sample_expected.append(outputs_array[random_indexs[i]]) sample_outputs.append(out_nnet_array[random_indexs[i]]) h_new: np.ndarray = approx_admissible_conv(env, nnet, out_nnet_array, outputs_array, states, random_states, sample_outputs, sample_expected) #before, after = plt.subplots() plt.scatter(sample_expected, sample_outputs, c = '000000', linewidths = 0.1) #plt.plot([0,0],[30,30], c = 'g') plt.axline([0,0],[30,30], linewidth =3, c = 'g') plt.ylabel('NNet output') plt.xlabel('Expected value') plt.title("Output vs Expected") plt.show() #before.savefig("preconversion.pdf") plt.scatter(sample_expected, h_new, c = '000000', linewidths = 0.1) plt.axline([0,0],[30,30], linewidth =3, c = 'g') plt.ylabel('Converted output') plt.xlabel('Expected value') plt.title("Converted Output vs Expected") plt.show()
def bellman( states: List, heuristic_fn, env: Environment ) -> Tuple[np.ndarray, List[np.ndarray], List[List[State]]]: # expand states states_exp, tc_l = env.expand(states) tc = np.concatenate(tc_l, axis=0) # get cost-to-go of expanded states states_exp_flat, split_idxs = misc_utils.flatten(states_exp) ctg_next: np.ndarray = heuristic_fn(states_exp_flat) # backup cost-to-go ctg_next_p_tc = tc + ctg_next ctg_next_p_tc_l = np.split(ctg_next_p_tc, split_idxs) is_solved = env.is_solved(states) ctg_backup = np.array([np.min(x) for x in ctg_next_p_tc_l ]) * np.logical_not(is_solved) return ctg_backup, ctg_next_p_tc_l, states_exp
def patchIt(self, testInst, config=False): C = Changes() # Record changes testInst = pd.DataFrame(testInst).transpose() current = self.find(testInst, self.tree) node = current while node.lvl > -1: node = node.up # Move to tree root leaves = flatten([self.leaves(_k) for _k in node.kids]) try: if self.config: best = sorted( [l for l in leaves if l.score <= 0.9 * current.score], key=lambda F: self.howfar(current, F))[0] else: best = \ sorted( [l for l in leaves if l.score == 0 ], key=lambda F: self.howfar(current, F))[0] # set_trace() except: return testInst.values.tolist()[0] def new(old, range): rad = abs(min(range[1] - old, old - range[1])) return abs(range[0]), abs(range[1]) # return uniform(range[0], range[1]) for ii in best.branch: before = testInst[ii[0]] if not ii in current.branch: then = testInst[ii[0]].values[0] now = ii[1] if self.config else new(testInst[ii[0]].values[0], ii[1]) # print(now) testInst[ii[0]] = str(now) # C.save(name=ii[0], old=then, new=now) testInst[testInst.columns[-1]] = 1 # self.change.append(C.log) return testInst.values.tolist()[0]
def astar_update(states: List[State], env: Environment, num_steps: int, heuristic_fn): weights: List[float] = list(np.random.rand(len(states))) astar = AStar(states, env, heuristic_fn, weights) for _ in range(num_steps): astar.step(heuristic_fn, 1, verbose=False) nodes_popped: List[List[Node]] = astar.get_popped_nodes() nodes_popped_flat: List[Node] nodes_popped_flat, _ = misc_utils.flatten(nodes_popped) for node in nodes_popped_flat: node.compute_bellman() states_update: List[State] = [node.state for node in nodes_popped_flat] cost_to_go_update: np.array = np.array( [node.bellman for node in nodes_popped_flat]) is_solved: np.array = np.array(astar.has_found_goal()) return states_update, cost_to_go_update, is_solved
def gbfs_update(states: List[State], env: Environment, num_steps: int, heuristic_fn, eps_max: float): eps: List[float] = list(np.random.rand(len(states)) * eps_max) gbfs = GBFS(states, env, eps=eps) for _ in range(num_steps): gbfs.step(heuristic_fn) trajs: List[List[Tuple[State, float]]] = gbfs.get_trajs() trajs_flat: List[Tuple[State, float]] trajs_flat, _ = misc_utils.flatten(trajs) is_solved: np.ndarray = np.array(gbfs.get_is_solved()) states_update: List = [] cost_to_go_update_l: List[float] = [] for traj in trajs_flat: states_update.append(traj[0]) cost_to_go_update_l.append(traj[1]) cost_to_go_update = np.array(cost_to_go_update_l) return states_update, cost_to_go_update, is_solved
def step(self, heuristic_fn: Callable, batch_size: int, include_solved: bool = False, verbose: bool = False): start_time_itr = time.time() instances: List[Instance] if include_solved: instances = self.instances else: instances = [ instance for instance in self.instances if len(instance.goal_nodes) == 0 ] # Pop from open start_time = time.time() popped_nodes_all: List[List[Node]] = pop_from_open( instances, batch_size) pop_time = time.time() - start_time # Expand nodes start_time = time.time() nodes_c_all: List[List[Node]] = expand_nodes(instances, popped_nodes_all, self.env) expand_time = time.time() - start_time # Get heuristic of children, do heur before check so we can do backup start_time = time.time() nodes_c_all_flat, _ = misc_utils.flatten(nodes_c_all) weights, _ = misc_utils.flatten( [[weight] * len(nodes_c) for weight, nodes_c in zip(self.weights, nodes_c_all)]) path_costs, heuristics = add_heuristic_and_cost( nodes_c_all_flat, heuristic_fn, weights) heur_time = time.time() - start_time # Check if children are in closed start_time = time.time() nodes_c_all = remove_in_closed(instances, nodes_c_all) check_time = time.time() - start_time # Add to open start_time = time.time() add_to_open(instances, nodes_c_all) add_time = time.time() - start_time itr_time = time.time() - start_time_itr # Print to screen if verbose: if heuristics.shape[0] > 0: min_heur = np.min(heuristics) min_heur_pc = path_costs[np.argmin(heuristics)] max_heur = np.max(heuristics) max_heur_pc = path_costs[np.argmax(heuristics)] print("Itr: %i, Added to OPEN - Min/Max Heur(PathCost): " "%.2f(%.2f)/%.2f(%.2f) " % (self.step_num, min_heur, min_heur_pc, max_heur, max_heur_pc)) print("Times - pop: %.2f, expand: %.2f, check: %.2f, heur: %.2f, " "add: %.2f, itr: %.2f" % (pop_time, expand_time, check_time, heur_time, add_time, itr_time)) print("") # Update timings self.timings['pop'] += pop_time self.timings['expand'] += expand_time self.timings['check'] += check_time self.timings['heur'] += heur_time self.timings['add'] += add_time self.timings['itr'] += itr_time self.step_num += 1
def expand_nodes(instances: List[Instance], popped_nodes_all: List[List[Node]], env: Environment): # Get children of all nodes at once (for speed) popped_nodes_flat: List[Node] split_idxs: List[int] popped_nodes_flat, split_idxs = misc_utils.flatten(popped_nodes_all) if len(popped_nodes_flat) == 0: return [[]] states: List[State] = [x.state for x in popped_nodes_flat] states_c_by_node: List[List[State]] tcs_np: List[np.ndarray] states_c_by_node, tcs_np = env.expand(states) tcs_by_node: List[List[float]] = [list(x) for x in tcs_np] # Get is_solved on all states at once (for speed) states_c: List[State] states_c, split_idxs_c = misc_utils.flatten(states_c_by_node) is_solved_c: List[bool] = list(env.is_solved(states_c)) is_solved_c_by_node: List[List[bool]] = misc_utils.unflatten( is_solved_c, split_idxs_c) # Update path costs for all states at once (for speed) parent_path_costs = np.expand_dims( np.array([node.path_cost for node in popped_nodes_flat]), 1) path_costs_c: List[float] = (parent_path_costs + np.array(tcs_by_node)).flatten().tolist() path_costs_c_by_node: List[List[float]] = misc_utils.unflatten( path_costs_c, split_idxs_c) # Reshape lists tcs_by_inst_node: List[List[List[float]]] = misc_utils.unflatten( tcs_by_node, split_idxs) patch_costs_c_by_inst_node: List[List[List[float]]] = misc_utils.unflatten( path_costs_c_by_node, split_idxs) states_c_by_inst_node: List[List[List[State]]] = misc_utils.unflatten( states_c_by_node, split_idxs) is_solved_c_by_inst_node: List[List[List[bool]]] = misc_utils.unflatten( is_solved_c_by_node, split_idxs) # Get child nodes instance: Instance nodes_c_by_inst: List[List[Node]] = [] for inst_idx, instance in enumerate(instances): nodes_c_by_inst.append([]) parent_nodes: List[Node] = popped_nodes_all[inst_idx] tcs_by_node: List[List[float]] = tcs_by_inst_node[inst_idx] path_costs_c_by_node: List[ List[float]] = patch_costs_c_by_inst_node[inst_idx] states_c_by_node: List[List[State]] = states_c_by_inst_node[inst_idx] is_solved_c_by_node: List[ List[bool]] = is_solved_c_by_inst_node[inst_idx] parent_node: Node tcs_node: List[float] states_c: List[State] str_reps_c: List[str] for parent_node, tcs_node, path_costs_c, states_c, is_solved_c in zip( parent_nodes, tcs_by_node, path_costs_c_by_node, states_c_by_node, is_solved_c_by_node): state: State for move_idx, state in enumerate(states_c): path_cost: float = path_costs_c[move_idx] is_solved: bool = is_solved_c[move_idx] node_c: Node = Node(state, path_cost, is_solved, move_idx, parent_node) nodes_c_by_inst[inst_idx].append(node_c) parent_node.children.append(node_c) parent_node.transition_costs.extend(tcs_node) instance.num_nodes_generated += len(nodes_c_by_inst[inst_idx]) return nodes_c_by_inst