def test_increase_interaction_amount(self) -> None: """Checks if interactions where the amount increased between reports are shown.""" diff = BlameReportDiff(self.reports[1], self.reports[0]) changed_func = diff.get_blame_result_function_entry('bool_exec') # Check if deleted function is correctly added to diff self.assertEqual(changed_func.name, 'bool_exec') self.assertEqual(changed_func.demangled_name, 'bool_exec') self.assertEqual(len(changed_func.interactions), 2) # Check second interaction, that was increased self.assertEqual( changed_func.interactions[1].base_taint.commit.commit_hash, FullCommitHash('48f8ed5347aeb9d54e7ea041b1f8d67ffe74db33')) self.assertEqual(len(changed_func.interactions[1].interacting_taints), 2) self.assertEqual( changed_func.interactions[1].interacting_taints[0].commit. commit_hash, FullCommitHash('a387695a1a2e52dcb1c5b21e73d2fd5a6aadbaf9')) self.assertEqual( changed_func.interactions[1].interacting_taints[1].commit. commit_hash, FullCommitHash('e8999a84efbd9c3e739bff7af39500d14e61bfbc')) self.assertEqual(changed_func.interactions[1].amount, 2)
def test_remove_function_between_reports(self) -> None: """Checks if the diff containts functions that where removed between reports.""" diff = BlameReportDiff(self.reports[2], self.reports[0]) del_func = diff.get_blame_result_function_entry('bool_exec') # Check if deleted function is correctly added to diff self.assertEqual(del_func.name, 'bool_exec') self.assertEqual(del_func.demangled_name, 'bool_exec') self.assertEqual(len(del_func.interactions), 2) # Check first interaction self.assertEqual( del_func.interactions[0].base_taint.commit.commit_hash, FullCommitHash('48f8ed5347aeb9d54e7ea041b1f8d67ffe74db33')) self.assertEqual(len(del_func.interactions[0].interacting_taints), 1) self.assertEqual( del_func.interactions[0].interacting_taints[0].commit.commit_hash, FullCommitHash('a387695a1a2e52dcb1c5b21e73d2fd5a6aadbaf9')) self.assertEqual(del_func.interactions[0].amount, 22) # Check second interaction self.assertEqual( del_func.interactions[1].base_taint.commit.commit_hash, FullCommitHash('48f8ed5347aeb9d54e7ea041b1f8d67ffe74db33')) self.assertEqual(len(del_func.interactions[1].interacting_taints), 2) self.assertEqual( del_func.interactions[1].interacting_taints[0].commit.commit_hash, FullCommitHash('a387695a1a2e52dcb1c5b21e73d2fd5a6aadbaf9')) self.assertEqual( del_func.interactions[1].interacting_taints[1].commit.commit_hash, FullCommitHash('e8999a84efbd9c3e739bff7af39500d14e61bfbc')) self.assertEqual(del_func.interactions[1].amount, 5)
def test_num_instructions_diff_removed(self) -> None: """Checks if we correctly calculate the numer of instructions in a diff.""" diff = BlameReportDiff(self.reports[2], self.reports[0]) del_func = diff.get_blame_result_function_entry('bool_exec') # Check if new function is correctly added to diff self.assertEqual(del_func.name, 'bool_exec') self.assertEqual(del_func.num_instructions, 42)
def test_num_instructions_diff_added(self) -> None: """Checks if we correctly calculate the numer of instructions in a diff.""" diff = BlameReportDiff(self.reports[1], self.reports[0]) new_func = diff.get_blame_result_function_entry('_Z7doStuffdd') # Check if new function is correctly added to diff self.assertEqual(new_func.name, '_Z7doStuffdd') self.assertEqual(new_func.num_instructions, 42)
def test_add_function_between_reports(self) -> None: """Checks if the diff containts functions that where added between reports.""" diff = BlameReportDiff(self.reports[1], self.reports[0]) new_func = diff.get_blame_result_function_entry('_Z7doStuffdd') # Check if new function is correctly added to diff self.assertEqual(new_func.name, '_Z7doStuffdd') self.assertEqual(new_func.demangled_name, 'doStuff(double, double)') self.assertEqual(len(new_func.interactions), 1) self.assertEqual( new_func.interactions[0].base_taint.commit.commit_hash, FullCommitHash('48f8ed5347aeb9d54e7ea041b1f8d67ffe74db33')) self.assertEqual(len(new_func.interactions[0].interacting_taints), 1) self.assertEqual( new_func.interactions[0].interacting_taints[0].commit.commit_hash, FullCommitHash('a387695a1a2e52dcb1c5b21e73d2fd5a6aadbaf9')) self.assertEqual(new_func.interactions[0].amount, 2)
def create_data_frame_for_report( report_paths: tp.Tuple[Path, Path] ) -> tp.Tuple[pd.DataFrame, str, str]: head_report = load_blame_report(report_paths[0]) pred_report = load_blame_report(report_paths[1]) diff_report = BlameReportDiff(head_report, pred_report) base_inter_c_repo_pair_mapping = \ gen_base_to_inter_commit_repo_pair_mapping( diff_report ) def build_dataframe_row(base_hash: FullCommitHash, base_library: str, inter_hash: FullCommitHash, inter_library: str, amount: int) -> tp.Dict[str, tp.Any]: data_dict: tp.Dict[str, tp.Any] = { 'revision': head_report.head_commit.hash, 'time_id': commit_map.short_time_id(head_report.head_commit), 'base_hash': base_hash.hash, 'base_lib': base_library, 'inter_hash': inter_hash.hash, 'inter_lib': inter_library, 'amount': amount } return data_dict result_data_dicts: tp.List[tp.Dict[str, tp.Any]] = [] for base_pair in base_inter_c_repo_pair_mapping: inter_pair_amount_dict = base_inter_c_repo_pair_mapping[ base_pair] for inter_pair in inter_pair_amount_dict: result_data_dicts.append( build_dataframe_row( base_hash=base_pair.commit.commit_hash, base_library=base_pair.commit.repository_name, inter_hash=inter_pair.commit.commit_hash, inter_library=inter_pair.commit.repository_name, amount=inter_pair_amount_dict[inter_pair])) return (pd.DataFrame(result_data_dicts), id_from_paths(report_paths), timestamp_from_paths(report_paths))
def test_function_not_in_diff(self) -> None: """Checks that only functions that changed are in the diff.""" # Report 2 diff = BlameReportDiff(self.reports[1], self.reports[0]) self.assertTrue(diff.has_function('bool_exec')) self.assertTrue(diff.has_function('_Z7doStuffii')) self.assertTrue(diff.has_function('_Z7doStuffdd')) self.assertFalse(diff.has_function('adjust_assignment_expression')) # Report 3 diff_2 = BlameReportDiff(self.reports[2], self.reports[0]) self.assertTrue(diff_2.has_function('bool_exec')) self.assertFalse(diff_2.has_function('adjust_assignment_expression')) self.assertFalse(diff_2.has_function('_Z7doStuffii')) # Report 4 diff_3 = BlameReportDiff(self.reports[3], self.reports[0]) self.assertTrue(diff_3.has_function('bool_exec')) self.assertFalse(diff_3.has_function('adjust_assignment_expression')) self.assertFalse(diff_3.has_function('_Z7doStuffii'))
def create_data_frame_for_report( report_paths: tp.Tuple[Path, Path] ) -> tp.Tuple[pd.DataFrame, str, str]: # Look-up commit and infos about the HEAD commit of the report head_report = load_blame_report(report_paths[0]) pred_report = load_blame_report(report_paths[1]) commit = repo.get(head_report.head_commit.hash) commit_date = datetime.utcfromtimestamp(commit.commit_time) pred_commit = repo.get(pred_report.head_commit.hash) diff_between_head_pred = BlameReportDiff(head_report, pred_report) # Calculate the total churn between pred and base commit code_churn = calc_code_churn( Path(repo.path), FullCommitHash.from_pygit_commit(pred_commit), FullCommitHash.from_pygit_commit(commit), ChurnConfig.create_c_style_languages_config()) total_churn = code_churn[1] + code_churn[2] def weighted_avg(tuples: tp.List[tp.Tuple[int, int]]) -> float: total_sum = 0 degree_sum = 0 for degree, amount in tuples: degree_sum += degree total_sum += (degree * amount) return total_sum / max(1, degree_sum) def combine_max(tuples: tp.List[tp.Tuple[int, int]]) -> float: if tuples: return max([x for x, y in tuples]) return 0 return (pd.DataFrame( { 'revision': head_report.head_commit.hash, 'time_id': commit_map.short_time_id(head_report.head_commit), 'churn': total_churn, 'num_interactions': count_interactions(diff_between_head_pred), 'num_interacting_commits': count_interacting_commits(diff_between_head_pred), 'num_interacting_authors': count_interacting_authors(diff_between_head_pred, commit_lookup), "ci_degree_mean": weighted_avg( generate_degree_tuples(diff_between_head_pred)), "author_mean": weighted_avg( generate_author_degree_tuples(diff_between_head_pred, commit_lookup)), "avg_time_mean": weighted_avg( generate_avg_time_distribution_tuples( diff_between_head_pred, commit_lookup, 1)), "ci_degree_max": combine_max( generate_degree_tuples(diff_between_head_pred)), "author_max": combine_max( generate_author_degree_tuples(diff_between_head_pred, commit_lookup)), "avg_time_max": combine_max( generate_max_time_distribution_tuples( diff_between_head_pred, commit_lookup, 1)), 'year': commit_date.year, }, index=[0]), id_from_paths(report_paths), timestamp_from_paths(report_paths))