def test_empty_ast(self, not_empty_ast: ast.AST) -> None: empty_ast_1 = ast.parse('') empty_ast_2 = ast.parse('') # Check that different empty asts are equal assert are_asts_equal(empty_ast_1, empty_ast_2) assert not are_asts_equal(empty_ast_1, not_empty_ast), \ f'\n\n{get_code_from_tree(empty_ast_1)} \n\n\n{get_code_from_tree(not_empty_ast)}'
def are_equal(expected_out: Tuple[int, List[AtiItem], ast.AST, ast.AST], actual_out: Tuple[int, List[AtiItem], ast.AST, ast.AST]) -> bool: expected_index, expected_ati_items, expected_anon_tree, expected_canon_tree = expected_out actual_index, actual_ati_items, actual_anon_tree, actual_canon_tree = actual_out return expected_index == actual_index and \ __are_equal_ati_items_lists(expected_ati_items, actual_ati_items) and \ are_asts_equal(expected_anon_tree, actual_anon_tree) and \ are_asts_equal(expected_canon_tree, actual_canon_tree)
def drop_same_anon_trees(df: pd.DataFrame) -> pd.DataFrame: log.info(f'Start dropping same anon trees, df size is {len(df)}') df.index = np.arange(0, len(df)) df_anon_trees = df[CODE_TRACKER_COLUMN.FRAGMENT.value].apply( lambda f: get_trees(f, {TREE_TYPE.ANON})[0]).to_list() indices_to_drop = [] i = 0 while i < len(df_anon_trees): log.info(f'Handling {i}/{len(df_anon_trees)} anon tree') current_anon_tree = df_anon_trees[i] j = i + 1 while j < len(df_anon_trees): next_anon_tree = df_anon_trees[j] if are_asts_equal(current_anon_tree, next_anon_tree): log.info(f'Dropping {j} anon tree') # We have to add len(indices_to_drop) because every time the index j is dropped, # corresponding anon_tree is deleted, so indices are shifted by one. indices_to_drop.append(j + len(indices_to_drop)) del df_anon_trees[j] else: j += 1 i += 1 df.drop(df.index[indices_to_drop], inplace=True) df.index = np.arange(0, len(df)) log.info(f'Stop dropping same anon trees, df size is {len(df)}') return df
def __find_closest_vertex(self, user_vertex: Vertex, goal: Vertex, to_add_empty: bool = False) -> Optional[Vertex]: """ 1. For each (by default, not empty) vertex with different canon_tree: 1.1 Find dist from it to goal 1.2 Consider as candidate if dist <= user_dist_to_goal 2. Choose the best vertex from candidates using __choose_best_vertex() """ user_diffs_to_goal = goal.get_dist(user_vertex) log.info(f'User diff to goal is {user_diffs_to_goal}') candidates = [] for vertex in self._graph.get_traversal(): # We don't want to add to result the same vertex if are_asts_equal(user_vertex.serialized_code.canon_tree, vertex.serialized_code.canon_tree): continue if not to_add_empty and self._graph.is_empty_vertex(vertex): continue # Todo: calculate diffs to the nearest goal from each vertex or not??? # Todo: think about empty tree diffs = self._graph.get_dist_between_vertices(vertex, goal) if diffs <= user_diffs_to_goal: candidates.append(vertex) return self.__choose_best_vertex(user_vertex, candidates)
def does_lead_to_full_solution( self, test_inputs_number: int) -> List[Tuple[bool, int]]: results = [] for i, t_i in enumerate(self._test_inputs): log.info( f'Run {i}/{len(self._test_inputs)} test input, len: {len(t_i[TEST_INPUT.SOURCE_CODE])}' ) anon_tree, canon_tree = TestSystem.create_user_trees( self._hint_handler, t_i) prev_anon_trees = [anon_tree.tree] steps_n = 0 same_tree_in_loop = False # Keep getting hints until reaching the full solution or getting the same tree again (it means that # there is a loop, so we cannot lead to the full solution) while anon_tree.rate != TEST_RESULT.FULL_SOLUTION.value and not same_tree_in_loop: log.info( f'step: {steps_n}, anon tree:\n {get_code_from_tree(anon_tree.tree)}' ) anon_tree = self._path_finder.find_next_anon_tree( anon_tree, canon_tree, f'test_result_{i}_step_{steps_n}') canon_tree = get_canon_tree_from_anon_tree( anon_tree.tree, get_imports(anon_tree.tree)) steps_n += 1 # Check for getting the same tree again for i, prev_anon_tree in enumerate(prev_anon_trees): log.info(f'{i} ', end='') if are_asts_equal(anon_tree.tree, prev_anon_tree): log.info(f'Get the same tree, step {steps_n}') same_tree_in_loop = True break prev_anon_trees.append(anon_tree.tree) results += [(not same_tree_in_loop, steps_n)] log.info(f'results: {results}') return results
def find_anon_tree(self, anon_tree: ast.AST) -> Optional[AnonTree]: current_nodes_number = AstStructure.get_nodes_number_in_ast(anon_tree) for a_t in self._anon_trees: # It will work faster if current_nodes_number != a_t.nodes_number: continue elif are_asts_equal(a_t.tree, anon_tree): return a_t return None
def _IMeasuredTree__init_diffs_number_and_rollback_probability( self) -> None: if are_asts_equal(self._user_tree.tree, self._candidate_tree.tree): self._diffs_number = math.inf self._rollback_probability = 0 else: diffs_number, delete_edits = GumTreeDiff \ .get_diffs_and_delete_edits_numbers(self.user_tree.tree_file, self.candidate_tree.tree_file) self._diffs_number = diffs_number if diffs_number != 0 else math.inf self._rollback_probability = 0 if diffs_number == 0 else delete_edits / diffs_number
def test_same_canon_trees_in_same_vertices(self, vertex: VERTEX, subtests): same_canon_fragments = [ all_fragments[i] for i in INDICES_BY_VERTEX[vertex] ] canon_trees = [ get_trees(f, {TREE_TYPE.CANON})[0] for f in same_canon_fragments ] for canon_tree_1, canon_tree_2 in itertools.product(canon_trees, repeat=2): with subtests.test(): assert are_asts_equal(canon_tree_1, canon_tree_2)
def test_different_canon_trees_in_different_vertices(self, subtests): # Take the first fragment from each vertex to get all fragments with different canon trees different_canon_fragments = [ all_fragments[INDICES_BY_VERTEX[vertex][0]] for vertex in VERTEX ] canon_trees = [ get_trees(f, {TREE_TYPE.CANON})[0] for f in different_canon_fragments ] for canon_tree_1, canon_tree_2 in zip(canon_trees, np.roll(canon_trees, 1)): with subtests.test(): assert not are_asts_equal(canon_tree_1, canon_tree_2)
def __remove_loops(code_info_chain: List[Tuple[Code, CodeInfo]], user: User) -> List[Tuple[Code, CodeInfo]]: # Add empty code # We want to remove loops, but we can have the situation: Tree1 -> Tree2 -> Tree3 -> Empty Tree -> Tree5 # In the case we will get after loops removing: Tree1 -> Tree2 -> Tree3 -> Empty Tree -> Tree5, # but it is incorrect, because we expect Empty Tree -> Tree5 # If we add empty code, we will have: Empty Tree -> Tree1 -> Tree2 -> Tree3 -> Empty Tree -> Tree5 # and we will get after loops removing: Empty Tree -> Tree5 code_info_chain = [(Code.from_source( '', TEST_RESULT.CORRECT_CODE.value), CodeInfo(user))] + code_info_chain current_tree_index = 0 while current_tree_index < len(code_info_chain): current_anon_tree = code_info_chain[current_tree_index][0].anon_tree same_tree_indexes = [] for next_tree_index in range(current_tree_index + 1, len(code_info_chain)): next_anon_tree = code_info_chain[next_tree_index][0].anon_tree if are_asts_equal(current_anon_tree, next_anon_tree): same_tree_indexes.append(next_tree_index) if len(same_tree_indexes) > 0: del code_info_chain[current_tree_index:max(same_tree_indexes)] current_tree_index += 1 return code_info_chain
def test_equal_asts(self) -> None: asts = get_asts_from_sources(equal_sources) for i in range(len(asts)): for j in range(i + 1, len(asts)): assert are_asts_equal(asts[i], asts[j]), \ f'\nast {i}: \n{get_code_from_tree(asts[i])} \n\n\nast {j}: {get_code_from_tree(asts[j])}'
def test_same_ast(self, ast: ast.AST) -> None: assert are_asts_equal(ast, ast)
def __are_same_fragments(current_anon_tree: ast.AST, solutions: pd.DataFrame, next_index: int) -> bool: fragment = __get_column_value(solutions, next_index, consts.CODE_TRACKER_COLUMN.FRAGMENT) next_anon_tree, = get_trees(fragment, {TREE_TYPE.ANON}) return are_asts_equal(current_anon_tree, next_anon_tree)