def analyze_patch(self): """ Analyze loaded patch to extract modified lines and statistics """ assert self.patch is not None, "Missing patch" assert isinstance(self.patch, str), "Invalid patch type" # List all modified lines from current revision changes patch_stats = rs_parsepatch.get_lines(self.patch) assert len(patch_stats) > 0, "Empty patch" self.lines = {stat["filename"]: stat["added_lines"] for stat in patch_stats} # Shortcut to files modified self.files = self.lines.keys() # Report nb of files and lines analyzed stats.add_metric("analysis.files", len(self.files)) stats.add_metric( "analysis.lines", sum(len(line) for line in self.lines.values()) )
def test_phabricator(mock_config, mock_revision): """ Test a phabricator revision """ assert not hasattr(mock_revision, "mercurial") assert mock_revision.diff_id == 42 assert mock_revision.diff_phid == "PHID-DIFF-test" assert mock_revision.url == "https://phabricator.test/D51" assert repr(mock_revision) == "PHID-DIFF-test" assert mock_revision.id == 51 # revision # Patch is automatically loaded from Phabricator assert mock_revision.patch is not None assert isinstance(mock_revision.patch, str) assert len(mock_revision.patch.split("\n")) == 15 patch = rs_parsepatch.get_diffs(mock_revision.patch) assert patch == [ { "binary": False, "copied_from": None, "deleted": False, "filename": "test.txt", "lines": [(1, 1, b"Hello World"), (None, 2, b"Second line")], "modes": {}, "new": False, "renamed_from": None, }, { "binary": False, "copied_from": None, "deleted": False, "filename": "test.cpp", "lines": [(None, 1, b"Hello World")], "modes": {"new": 33188}, "new": True, "renamed_from": None, }, ] patch = rs_parsepatch.get_lines(mock_revision.patch) assert patch == [ { "added_lines": [2], "binary": False, "copied_from": None, "deleted": False, "deleted_lines": [], "filename": "test.txt", "modes": {}, "new": False, "renamed_from": None, }, { "added_lines": [1], "binary": False, "copied_from": None, "deleted": False, "deleted_lines": [], "filename": "test.cpp", "modes": {"new": 33188}, "new": True, "renamed_from": None, }, ]
def transform(hg: hglib.client, repo_dir: str, commit: Commit): hg_modified_files(hg, commit) if commit.ignored or len(commit.backsout) > 0 or commit.bug_id is None: return commit assert code_analysis_server is not None source_code_sizes = [] other_sizes = [] test_sizes = [] metrics_file_count = 0 patch = hg.export(revs=[commit.node.encode("ascii")], git=True) try: patch_data = rs_parsepatch.get_lines(patch) except Exception: logger.error(f"Exception while analyzing {commit.node}") raise for stats in patch_data: path = stats["filename"] if stats["binary"]: if not is_test(path): commit.types.add("binary") continue size = None after = None if not stats["deleted"]: try: after = hg.cat( [os.path.join(repo_dir, path).encode("utf-8")], rev=commit.node.encode("ascii"), ) size = after.count(b"\n") except hglib.error.CommandError as e: if b"no such file in rev" not in e.err: raise type_ = get_type(path) if is_test(path): commit.test_files_modified_num += 1 commit.test_added += len(stats["added_lines"]) commit.test_deleted += len(stats["deleted_lines"]) if size is not None: test_sizes.append(size) # We don't have a 'test' equivalent of types, as most tests are JS, # so this wouldn't add useful information. elif type_ in SOURCE_CODE_TYPES_TO_EXT: commit.source_code_files_modified_num += 1 commit.source_code_added += len(stats["added_lines"]) commit.source_code_deleted += len(stats["deleted_lines"]) if size is not None: source_code_sizes.append(size) if type_ != "IDL/IPDL/WebIDL": metrics = code_analysis_server.metrics(path, after, unit=False) if metrics.get("spaces"): metrics_file_count += 1 error = get_metrics(commit, metrics["spaces"]) if error: logger.debug( f"rust-code-analysis error on commit {commit.node}, path {path}" ) touched_functions = get_touched_functions( metrics["spaces"], stats["deleted_lines"], stats["added_lines"], ) if len(touched_functions) > 0: commit.functions[path] = list(touched_functions) # Replace type with "Objective-C/C++" if rust-code-analysis detected this is an Objective-C/C++ file. if type_ == "C/C++" and metrics.get( "language") == "obj-c/c++": type_ = "Objective-C/C++" commit.types.add(type_) else: commit.other_files_modified_num += 1 commit.other_added += len(stats["added_lines"]) commit.other_deleted += len(stats["deleted_lines"]) if size is not None: other_sizes.append(size) if type_: commit.types.add(type_) commit.total_source_code_file_size = sum(source_code_sizes) commit.average_source_code_file_size = ( commit.total_source_code_file_size / len(source_code_sizes) if len(source_code_sizes) > 0 else 0) commit.maximum_source_code_file_size = max(source_code_sizes, default=0) commit.minimum_source_code_file_size = min(source_code_sizes, default=0) commit.total_other_file_size = sum(other_sizes) commit.average_other_file_size = (commit.total_other_file_size / len(other_sizes) if len(other_sizes) > 0 else 0) commit.maximum_other_file_size = max(other_sizes, default=0) commit.minimum_other_file_size = min(other_sizes, default=0) commit.total_test_file_size = sum(test_sizes) commit.average_test_file_size = (commit.total_test_file_size / len(test_sizes) if len(test_sizes) > 0 else 0) commit.maximum_test_file_size = max(test_sizes, default=0) commit.minimum_test_file_size = min(test_sizes, default=0) if metrics_file_count: commit.average_cyclomatic = commit.total_cyclomatic / metrics_file_count commit.average_halstead_n2 = commit.total_halstead_n2 / metrics_file_count commit.average_halstead_N2 = commit.total_halstead_N2 / metrics_file_count commit.average_halstead_n1 = commit.total_halstead_n1 / metrics_file_count commit.average_halstead_N1 = commit.total_halstead_N1 / metrics_file_count commit.average_source_loc = commit.total_source_loc / metrics_file_count commit.average_logical_loc = commit.total_logical_loc / metrics_file_count else: # these values are initialized with sys.maxsize (because we take the min) # if no files, then reset them to 0 (it'd be stupid to have min > max) commit.minimum_cyclomatic = 0 commit.minimum_halstead_N2 = 0 commit.minimum_halstead_n2 = 0 commit.minimum_halstead_N1 = 0 commit.minimum_halstead_n1 = 0 commit.minimum_source_loc = 0 commit.minimum_logical_loc = 0 return commit
def read(patch): counts = pp.get_counts(patch) diffs = pp.get_diffs(patch) lines = pp.get_lines(patch) # compare counts & diffs assert len(diffs) == len(counts) for count, ppd in zip(counts, diffs): diff = copy.deepcopy(ppd) del diff["lines"] diff.update({"added_lines": 0, "deleted_lines": 0}) for n, o, _ in ppd["lines"]: if n is None: diff["added_lines"] += 1 elif o is None: diff["deleted_lines"] += 1 assert diff == count # compare lines & diffs assert len(diffs) == len(lines) for line, ppd in zip(lines, diffs): diff = copy.deepcopy(ppd) del diff["lines"] diff.update({"added_lines": [], "deleted_lines": []}) for n, o, _ in ppd["lines"]: if n is None: diff["added_lines"].append(o) elif o is None: diff["deleted_lines"].append(n) assert diff == line if not isinstance(patch, str): patch = patch.decode("utf-8") wp_diffs = wp.parse_patch(patch) wp_diffs = list(wp_diffs) assert len(wp_diffs) == len(diffs) # compare wtp and pp outputs for ppd, wpd in zip(diffs, wp_diffs): wnew_path = get_filename(wpd.header.new_path) wold_path = get_filename(wpd.header.old_path) assert ppd["filename"] == wnew_path if wnew_path != wold_path: assert ppd["renamed_from"] == wold_path or ppd[ "copied_from"] == wold_path else: assert ppd["renamed_from"] is None if not wpd.changes: assert not ppd["lines"] continue changes = list(wpd.changes) assert len(ppd["lines"]) == len(changes) for pline, wline in zip(ppd["lines"], changes): wn, wo, wc, whc = wline pn, po, pc = pline assert wn == pn assert wo == po pc = pc.decode("utf-8") assert wc == pc