def __find_closest_vertex(self, user_vertex: Vertex, goal: Vertex, to_add_empty: bool = False) -> Optional[Vertex]: """ 1. For each (by default, not empty) vertex with different canon_tree: 1.1 Find dist from it to goal 1.2 Consider as candidate if dist <= user_dist_to_goal 2. Choose the best vertex from candidates using __choose_best_vertex() """ user_diffs_to_goal = goal.get_dist(user_vertex) log.info(f'User diff to goal is {user_diffs_to_goal}') candidates = [] for vertex in self._graph.get_traversal(): # We don't want to add to result the same vertex if are_asts_equal(user_vertex.serialized_code.canon_tree, vertex.serialized_code.canon_tree): continue if not to_add_empty and self._graph.is_empty_vertex(vertex): continue # Todo: calculate diffs to the nearest goal from each vertex or not??? # Todo: think about empty tree diffs = self._graph.get_dist_between_vertices(vertex, goal) if diffs <= user_diffs_to_goal: candidates.append(vertex) return self.__choose_best_vertex(user_vertex, candidates)
def __find_closest_tree(self, user_anon_tree: AnonTree, user_canon_nodes_number: int, canon_nodes_numbers_dict: Dict[int, list], candidates_file_name: str, can_stop_earlier: bool = True, to_use_lower_bound=True) -> Optional[AnonTree]: """ 1. Consider each vertex with similar nodes number as candidate (chose at least TOP_N_CANON = {1} candidates) 2. Choose at least TOP_N_ANON = {0} anon trees from canon candidates and run __choose_best_anon_tree """ # Get vertices ids with canon trees, which have nodes number similar to user canon_nodes_number vertices_ids = self.__get_top_n_candidates(self.canon_top_n, user_canon_nodes_number, canon_nodes_numbers_dict, can_stop_earlier, to_use_lower_bound) log.info(f'CANON_TOP_N vertices ids are {vertices_ids}') if len(vertices_ids) == 0: return None vertices: List[Vertex] = [ Vertex.get_item_by_id(id) for id in vertices_ids ] anon_trees = sum([v.serialized_code.anon_trees for v in vertices], []) anon_nodes_numbers_dict = self.__get_items_nodes_number_dict( anon_trees) anon_candidates = self.__get_top_n_candidates( self.anon_top_n, user_anon_tree.nodes_number, anon_nodes_numbers_dict, can_stop_earlier, to_use_lower_bound) return self.__choose_best_anon_tree(user_anon_tree, anon_candidates, candidates_file_name)
def __go_through_graph(self, user_anon: AnonTree, graph_anon: AnonTree, goal_anon: AnonTree) -> bool: """ 1. If __is_rate_worse, return False 2. If __is_most_of_path_is_done, return False 2. Return not __is_far_from_graph """ empty_anon = self._graph.empty_vertex.serialized_code.anon_trees[0] diffs_from_empty_to_user = GumTreeDiff.get_diffs_number( empty_anon.tree_file, user_anon.tree_file) diffs_from_user_to_goal = GumTreeDiff.get_diffs_number( user_anon.tree_file, goal_anon.tree_file) if self.__is_most_of_path_is_done( diffs_from_empty_to_user + diffs_from_user_to_goal, diffs_from_user_to_goal): log.info('Most of path is done') return False # Todo: add is_rate_worse diffs_from_user_to_graph_vertex = GumTreeDiff.get_diffs_number( user_anon.tree_file, graph_anon.tree_file) return not self.__is_far_from_graph(diffs_from_user_to_goal, diffs_from_user_to_graph_vertex)
def __find_closest_tree(self, user_anon_tree: AnonTree, user_canon_nodes_number: int, canon_nodes_numbers_dict: Dict[int, list], candidates_file_name: str) -> Optional[AnonTree]: """ 1. Consider each vertex with similar nodes number as candidate (chose at least TOP_N_CANON = {1} candidates) 2. Choose at least TOP_N_ANON = {0} anon trees from canon candidates and run __choose_best_anon_tree """ # Get vertices ids with canon trees, which have nodes number similar to user canon_nodes_number vertices_ids = self.__get_top_n_candidates(self.canon_top_n, user_canon_nodes_number, canon_nodes_numbers_dict) log.info(f'CANON_TOP_N vertices ids are {vertices_ids}') vertices: List[Vertex] = [ Vertex.get_item_by_id(id) for id in vertices_ids ] anon_trees = sum([v.serialized_code.anon_trees for v in vertices], []) anon_nodes_numbers_dict = self.__get_items_nodes_number_dict( anon_trees) anon_candidates = self.__get_top_n_candidates( self.anon_top_n, user_anon_tree.nodes_number, anon_nodes_numbers_dict) self.write_candidates_info_to_file( user_anon_tree, anon_candidates, f'{self.candidates_file_prefix}_{candidates_file_name}') return self.__choose_best_anon_tree(user_anon_tree, anon_candidates)
def __find_closest_goal(self, user_vertex: Vertex) -> Vertex: """ 1. Get list of all goals 2. Find the closest using __choose_best_vertex() """ goals = self._graph.end_vertex.parents log.info(f'Number of goals: {len(goals)}\nGoals ids are {[g.id for g in goals]}') return self.__choose_best_vertex(user_vertex, goals)
def __choose_best_vertex(self, user_vertex: Vertex, vertices: List[Vertex]) -> Optional[Vertex]: """ 1. Sort candidates using MeasuredVertex 2. Return the first candidate """ log.info(f'Number of candidates: {len(vertices)}\nCandidates ids are {([vertex.id for vertex in vertices])}') if len(vertices) == 0: return None candidates = list(map(lambda vertex: self.get_measured_tree(user_vertex, vertex), vertices)) candidates.sort() log.info(f'The best vertex id is {candidates[0].vertex.id}') return candidates[0].vertex
def _is_close_to_goals(self, closest_tree: AnonTree) -> bool: """ 1. Use only nodes number info. 2. Returns True, if closest_tree nodes number is more, than {0} * median for goals nodes number """ # if self.graph.median_goals_nodes_numbers if self.graph.is_goals_median_empty(): log.info( 'Cannot check if close to goals because goals median is empty') return False # Todo: don't we want to use 0.8 here instead of 0.2?? return closest_tree.nodes_number >= self.graph.goals_median * self.nodes_number_percent_close_to_goals
def __find_closest_tree( self, user_anon_tree: AnonTree, user_canon_nodes_number: int, canon_nodes_numbers_dict: Dict[int, list], candidates_file_name: str, can_stop_earlier: bool = True, to_use_lower_bound: bool = True, to_add_empty_tree: bool = False, to_add_same_structure_trees: bool = False) -> Optional[AnonTree]: """ 1. Consider each vertex with similar nodes number as candidate (chose at least TOP_N_CANON = {1} candidates) 2. Choose at least TOP_N_ANON = {0} anon trees from canon candidates 3. Consider each anon tree with same structure as candidate 4. Choose at least {2} trees according to nodes number from same tree candidates 4. Add empty tree if needed 5. Run __choose_best_anon_tree on all candidates """ # Get vertices ids with canon trees, which have nodes number similar to user canon_nodes_number vertices_ids = self.__get_top_n_candidates(self.canon_top_n, user_canon_nodes_number, canon_nodes_numbers_dict, can_stop_earlier, to_use_lower_bound) log.info(f'CANON_TOP_N vertices ids are {vertices_ids}') anon_candidates = [] if len(vertices_ids) != 0: vertices: List[Vertex] = [ Vertex.get_item_by_id(id) for id in vertices_ids ] anon_trees = sum([v.serialized_code.anon_trees for v in vertices], []) anon_nodes_numbers_dict = self.__get_items_nodes_number_dict( anon_trees) anon_candidates = self.__get_top_n_candidates( self.anon_top_n, user_anon_tree.nodes_number, anon_nodes_numbers_dict, can_stop_earlier, to_use_lower_bound) if to_add_empty_tree: anon_candidates.append( self._graph.empty_vertex.serialized_code.anon_trees[0]) if to_add_same_structure_trees: anon_candidates += self.__get_same_structure_trees( user_anon_tree, self.same_structure_top_n) return self.__choose_best_anon_tree(user_anon_tree, anon_candidates, candidates_file_name)
def __go_through_graph(self, user_vertex: Vertex, graph_vertex: Vertex, goal: Vertex) -> bool: """ 1. If __is_most_of_path_is_done, return False 2. Return not __is_far_from_graph """ diffs_from_user_to_goal = user_vertex.get_dist(goal) diffs_from_empty_to_user = self._graph.empty_vertex.get_dist( user_vertex) if self.__is_most_of_path_is_done( diffs_from_empty_to_user + diffs_from_user_to_goal, diffs_from_user_to_goal): log.info('Most of path is done') return False diffs_from_user_to_graph_vertex = user_vertex.get_dist(graph_vertex) return not self.__is_far_from_graph(diffs_from_user_to_goal, diffs_from_user_to_graph_vertex)
def __get_same_structure_trees(self, user_anon_tree: AnonTree, trees_number: int) -> List[AnonTree]: same_structure_anon_trees = [ AnonTree.get_item_by_id(a_id) for a_id in self.graph.anon_structure_dict[user_anon_tree.ast_structure] ] same_structure_anon_trees_dict = self.__get_items_nodes_number_dict( same_structure_anon_trees) same_structure_candidates = self.__get_top_n_candidates( trees_number, user_anon_tree.nodes_number, same_structure_anon_trees_dict, can_stop_earlier=False, to_use_lower_bound=True) log.info( f'Found trees with same structure: {[c.id for c in same_structure_candidates]}' ) return same_structure_candidates
def __choose_best_anon_tree( self, user_anon_tree: AnonTree, anon_trees: List[AnonTree]) -> Optional[AnonTree]: """ 1. Sort candidates using MeasuredTree 2. Return the first candidate """ log.info( f'Number of candidates: {len(anon_trees)}\nCandidates ids are {([a_t.id for a_t in anon_trees])}' ) if len(anon_trees) == 0: return None candidates = list( map( lambda anon_tree: self.get_measured_tree( user_anon_tree, anon_tree), anon_trees)) candidates.sort() log.info(f'The best vertex id is {candidates[0].candidate_tree.id}') return candidates[0].candidate_tree
def _is_close_to_goals(self, closest_tree: AnonTree) -> bool: """ 1. Use only nodes number info. 2. Returns True if percent of goals with similar nodes number (with indent no more than {1}) is more than {0} """ if self.graph.is_goals_median_empty(): log.info( 'Cannot check if close to goals because goals median is empty') return False # Todo: make it better goals_nodes_number = sum( [[k] * len(v) for k, v in self.graph.goals_nodes_number_dict.items()], []) count_similar_trees = 0 for g_n in goals_nodes_number: if abs(g_n - closest_tree.nodes_number ) <= self.max_goal_nodes_number_indent: count_similar_trees += 1 return count_similar_trees / len( goals_nodes_number) >= 1 - self.nodes_number_percent_close_to_goals
def find_next_anon_tree(self, user_vertex: Vertex) -> Vertex: """ 1. Find the closest goal (__find_closest_goal) 2. Find the closest graph_vertex (__find_closest_vertex_with_path) 3. Choose between them using __go_through_graph """ log.info(f'{self.__class__.__name__}\n' f'Start finding the next code state for ' f'the user code:\n{get_code_from_tree(user_vertex.serialized_code.anon_trees[0])}\nand ' f'the user:\n{user_vertex.code_info_list[0].user}') goal = self.__find_closest_goal(user_vertex) log.info(f'Chosen goal is vertex {goal.id}') graph_vertex = self.__find_closest_vertex(user_vertex, goal) log.info(f'Chosen graph_vertex is vertex {graph_vertex.id}') # We can have graph_vertex = None if graph_vertex and self.__go_through_graph(user_vertex, graph_vertex, goal): log.info(f'We are going through graph') return graph_vertex else: log.info(f'We are going directly to the goal') return goal
def __find_closest_vertex(self, user_vertex: Vertex, goal: Vertex) -> Optional[Vertex]: """ 1. If there is a vertex in the graph with same canon_tree: 1.1 Return __choose_best_vertex on vertex children 2. Consider each vertex with small __get_rollback_probability as candidate 3. Choose the best vertex from candidates using __choose_best_vertex() """ # Todo: 14/04 test vertex from graph vertex_in_graph = self._graph.find_vertex(user_vertex.canon_tree) if vertex_in_graph: log.info('Choosing best vertex from found vertex children') return self.__choose_best_vertex(user_vertex, vertex_in_graph.children) candidates = [] for vertex in self._graph.get_traversal(): if self.__get_rollback_probability(user_vertex.canon_tree, vertex.canon_tree) <= ROLLBACK_PROBABILITY: candidates.append(vertex) # # diffs = self._graph.get_diffs_number_between_vertexes(vertex, goal) # # if diffs <= user_diffs_to_goal: # candidates.append(vertex) return self.__choose_best_vertex(user_vertex, candidates)
def find_next_anon_tree( self, user_anon_tree: AnonTree, user_canon_tree: ast.AST, candidates_file_id: Optional[str] = None) -> AnonTree: """ 1. Find the same tree SAME_TREE in the graph and get the best tree from next trees (__find_same_tree_in_graph) 2. If SAME_TREE is not None, return SAME_TREE 2. Find the closest tree CLOSEST_TREE in graph (__find_closest_tree with graph.canon_trees_nodes_number, can_stop_earlier={0}, to_use_lower_bound={1}) 3. If not _is_close_to_goals, return CLOSEST_TREE 4. Find the closest goal CLOSEST_GOAL in graph (__find_closest_goal_tree, can_stop_earlier={2}, to_use_lower_bound={3}) 5. Choose between CLOSEST_TREE and CLOSEST_GOAL using __go_through_graph """ log.info( f'{self.__class__.__name__}\n' f'Start finding the next code state for ' f'the user code:\n{get_code_from_tree(user_anon_tree.tree)}\nand ' f'the user:\n{user_anon_tree.code_info_list[0].user}') self.candidates_file_prefix = f'{self.get_file_prefix_by_user_tree(candidates_file_id)}' same_tree = self.__find_same_tree_in_graph(user_anon_tree, user_canon_tree) if same_tree is not None: log.info( f'Found the same tree. Chosen anon tree:\n{get_code_from_tree(same_tree.tree)}' ) return same_tree log.info('Same tree not found') canon_nodes_number = AstStructure.get_nodes_number_in_ast( user_canon_tree) graph_anon_tree = self.__find_closest_tree( user_anon_tree, canon_nodes_number, self.graph.canon_nodes_number_dict, to_use_lower_bound=self.graph_tree_lower_bound, can_stop_earlier=self.graph_tree_stop_earlier, candidates_file_name='graph_candidates') # We can have graph_anon_tree = None if graph_anon_tree: log.info( f'Chosen anon tree in graph:\n{get_code_from_tree(graph_anon_tree.tree)}' ) if not self._is_close_to_goals(graph_anon_tree): log.info(f'The most of path is not done. Go through graph') return graph_anon_tree goal_anon_tree = self.__find_closest_goal_tree(user_anon_tree, canon_nodes_number) log.info( f'Chosen goal anon tree:\n{get_code_from_tree(goal_anon_tree.tree)}' ) # We can have graph_anon_tree = None if graph_anon_tree and self.__go_through_graph( user_anon_tree, graph_anon_tree, goal_anon_tree): log.info(f'We are going through graph') return graph_anon_tree else: log.info(f'We are going directly to the goal') return goal_anon_tree
def __get_top_n_candidates( top_n: int, nodes_number: int, nodes_numbers_dict: Dict[int, List[Any]]) -> List[Any]: """ We want to have top_n trees with nodes number, that is close to the given nodes_number. So we consequently add vertices with node numbers equal: 1. nodes_number 2. nodes_number - 1, nodes_number + 1 3. nodes_number - 2, nodes_number + 2 4. .... until we reach top_n or have no more node_numbers to add """ log.info( f'Start getting top_n candidates, top_n is {top_n}, nodes number is {nodes_number}' ) candidates = [] nodes_numbers_queue = collections.deque([nodes_number]) lower_bound = nodes_number upper_bound = nodes_number max_nodes_number = max(nodes_numbers_dict.keys()) min_nodes_number = min(nodes_numbers_dict.keys()) while len(candidates) < top_n and nodes_numbers_queue: log.info( f'Start adding candidates.\n' f'Candidates len is {len(candidates)}, queue have {len(nodes_numbers_queue)} nodes numbers' ) while nodes_numbers_queue: nodes_number = nodes_numbers_queue.pop() candidates += nodes_numbers_dict.get(nodes_number, []) log.info( f'Finish adding candidates.\n' f'Candidates len is {len(candidates)}, queue have {len(nodes_numbers_queue)} nodes numbers' ) lower_bound -= 1 if lower_bound >= min_nodes_number: log.info( f'Append lower_bound to queue: {lower_bound}, min nodes number is {min_nodes_number}' ) nodes_numbers_queue.append(lower_bound) upper_bound += 1 if upper_bound <= max_nodes_number: log.info( f'Append upper_bound to queue: {upper_bound}, max nodes number is {max_nodes_number}' ) nodes_numbers_queue.append(upper_bound) log.info( f'Finish getting top_n candidates, top_n is {top_n}, candidates len is {len(candidates)}' ) return candidates
def __get_top_n_candidates(top_n: int, nodes_number: int, nodes_numbers_dict: Dict[int, List[Any]], can_stop_earlier: bool = True, to_use_lower_bound: bool = False) -> List[Any]: """ We want to have top_n trees with nodes number, that is close to the given nodes_number. So we consequently add vertices with node numbers equal: 1. nodes_number 2. nodes_number - 1 (if lower bound is on), nodes_number + 1 3. nodes_number - 2 (if lower bound is on), nodes_number + 2 4. .... until we reach top_n or have no more node_numbers to add. If can_stop_earlier is True, we stop as soon as we far from user nodes number at {0} nodes numbers """ log.info( f'Start getting top_n candidates, top_n is {top_n}, nodes number is {nodes_number}' ) candidates = [] nodes_numbers_queue = collections.deque([nodes_number]) indent = 0 max_nodes_number = max(nodes_numbers_dict.keys()) min_nodes_number = min(nodes_numbers_dict.keys()) while len(candidates) < top_n and nodes_numbers_queue and \ (not can_stop_earlier or indent <= PathFinderV4.max_tree_nodes_number_indent): log.info( f'Start adding candidates.\n' f'Candidates len is {len(candidates)}, queue have {len(nodes_numbers_queue)} nodes numbers' ) while nodes_numbers_queue: new_nodes_number = nodes_numbers_queue.pop() candidates += nodes_numbers_dict.get(new_nodes_number, []) log.info( f'Finish adding candidates.\n' f'Candidates len is {len(candidates)}, queue have {len(nodes_numbers_queue)} nodes numbers' ) indent += 1 if to_use_lower_bound: lower_bound = nodes_number - indent if lower_bound >= min_nodes_number: log.info( f'Append lower_bound to queue: {lower_bound}, min nodes number is {min_nodes_number}' ) nodes_numbers_queue.append(lower_bound) upper_bound = nodes_number + indent if upper_bound <= max_nodes_number: log.info( f'Append upper_bound to queue: {upper_bound}, max nodes number is {max_nodes_number}' ) nodes_numbers_queue.append(upper_bound) log.info( f'Finish getting top_n candidates, top_n is {top_n}, candidates len is {len(candidates)}' ) return candidates