def test_get_revs(fake_hg_repo): hg, local, remote = fake_hg_repo add_file(hg, local, "file1", "1\n2\n3\n4\n5\n6\n7\n") revision1 = commit(hg) revs = repository.get_revs(hg) assert len(revs) == 1, "There should be one revision now" assert revs[0].decode("ascii") == revision1 add_file(hg, local, "file2", "1\n2\n3\n4\n5\n6\n7\n") revision2 = commit(hg) revs = repository.get_revs(hg) assert len(revs) == 2, "There should be two revisions now" assert revs[0].decode("ascii") == revision1 assert revs[1].decode("ascii") == revision2 add_file(hg, local, "file3", "1\n2\n3\n4\n5\n6\n7\n") revision3 = commit(hg) revs = repository.get_revs(hg) assert len(revs) == 3, "There should be three revisions now" assert revs[0].decode("ascii") == revision1 assert revs[1].decode("ascii") == revision2 assert revs[2].decode("ascii") == revision3 revs = repository.get_revs(hg, revision2) assert len(revs) == 2, "There should be two revisions after the first" assert revs[0].decode("ascii") == revision2 assert revs[1].decode("ascii") == revision3
def get_commits_to_ignore(self): logger.info("Download previous commits to ignore...") if db.is_old_version( IGNORED_COMMITS_DB) or not db.exists(IGNORED_COMMITS_DB): db.download(IGNORED_COMMITS_DB, force=True) logger.info("Get previously classified commits...") prev_commits_to_ignore = list(db.read(IGNORED_COMMITS_DB)) logger.info( f"Already found {len(prev_commits_to_ignore)} commits to ignore..." ) if len(prev_commits_to_ignore) > 0: rev_start = "children({})".format( prev_commits_to_ignore[-1]["rev"]) else: rev_start = 0 # 2 days more than the end date, so we can know if a commit was backed-out. # We have to do this as recent commits might be missing in the mercurial <-> git map, # otherwise we could just use "tip". end_date = datetime.now() - RELATIVE_END_DATE + relativedelta(2) with hglib.open(self.mercurial_repo_dir) as hg: revs = repository.get_revs( hg, rev_start, "pushdate('{}')".format(end_date.strftime("%Y-%m-%d"))) # Given that we use the pushdate, there might be cases where the starting commit is returned too (e.g. if we rerun the task on the same day). if len(prev_commits_to_ignore) > 0: found_prev = -1 for i, rev in enumerate(revs): if rev.decode("utf-8") == prev_commits_to_ignore[-1]["rev"]: found_prev = i break revs = revs[found_prev + 1:] commits = repository.hg_log_multi(self.mercurial_repo_dir, revs) repository.set_commits_to_ignore(self.mercurial_repo_dir, commits) commits_to_ignore = [] for commit in commits: if commit.ignored or commit.backedoutby: commits_to_ignore.append({ "rev": commit.node, "type": "backedout" if commit.backedoutby else "", }) logger.info(f"{len(commits_to_ignore)} new commits to ignore...") logger.info("...of which {} are backed-out".format( sum(1 for commit in commits_to_ignore if commit["type"] == "backedout"))) db.append(IGNORED_COMMITS_DB, commits_to_ignore) zstd_compress(IGNORED_COMMITS_DB) return prev_commits_to_ignore + commits_to_ignore
def get_commits_to_ignore(self): logger.info("Download previous commits to ignore...") db.download(IGNORED_COMMITS_DB) logger.info("Get previously classified commits...") prev_commits_to_ignore = list(db.read(IGNORED_COMMITS_DB)) logger.info( f"Already found {len(prev_commits_to_ignore)} commits to ignore..." ) # When we already have some analyzed commits, re-analyze the last 3500 to make sure # we didn't miss back-outs that happened since the last analysis. if len(prev_commits_to_ignore) > 0: first_commit_to_reanalyze = ( -3500 if len(prev_commits_to_ignore) >= 3500 else 0) rev_start = "children({})".format( prev_commits_to_ignore[first_commit_to_reanalyze]["rev"]) else: rev_start = 0 with hglib.open(self.mercurial_repo_dir) as hg: revs = repository.get_revs(hg, rev_start) commits = repository.hg_log_multi(self.mercurial_repo_dir, revs) with hglib.open(self.mercurial_repo_dir) as hg: repository.set_commits_to_ignore(hg, self.mercurial_repo_dir, commits) for commit in commits: commit.ignored |= commit.author_email == "*****@*****.**" chosen_commits = set() commits_to_ignore = [] for commit in commits: if commit.ignored or commit.backedoutby: commits_to_ignore.append({ "rev": commit.node, "type": "backedout" if commit.backedoutby else "", }) chosen_commits.add(commit.node) logger.info(f"{len(commits_to_ignore)} new commits to ignore...") for prev_commit in prev_commits_to_ignore[::-1]: if prev_commit["rev"] not in chosen_commits: commits_to_ignore.append(prev_commit) chosen_commits.add(prev_commit["rev"]) logger.info(f"{len(commits_to_ignore)} commits to ignore...") logger.info("...of which {} are backed-out".format( sum(1 for commit in commits_to_ignore if commit["type"] == "backedout"))) db.write(IGNORED_COMMITS_DB, commits_to_ignore) zstd_compress(IGNORED_COMMITS_DB) db.upload(IGNORED_COMMITS_DB)
def retrieve_commits(self, limit): repository.clone(self.repo_dir) if limit: # Mercurial revset supports negative integers starting from tip rev_start = -limit else: db.download(repository.COMMITS_DB, support_files_too=True) rev_start = 0 for commit in repository.get_commits(): rev_start = f"children({commit['node']})" with hglib.open(self.repo_dir) as hg: revs = repository.get_revs(hg, rev_start) chunk_size = 70000 for i in range(0, len(revs), chunk_size): repository.download_commits(self.repo_dir, revs=revs[i:(i + chunk_size)]) logger.info("commit data extracted from repository") # Some commits that were already in the DB from the previous run might need # to be updated (e.g. coverage information). repository.update_commits() zstd_compress(repository.COMMITS_DB) create_tar_zst(os.path.join("data", repository.COMMIT_EXPERIENCES_DB))
def go(repo_dir): with hglib.open(repo_dir) as hg: revs = repository.get_revs(hg, -1000, -500) commits = repository.hg_log(hg, revs) backouts = list( set(commit.backedoutby for commit in commits if commit.ever_backedout)) backedouts = list( set(commit.node for commit in commits if commit.ever_backedout)) likely_label_count = 0 possible_label_count = 0 likely_group_count = 0 possible_group_count = 0 backout_regressions = {} for backout in tqdm(backouts): p = Push(backout) label_regressions = p.get_regressions("label") likely_label_count += len(p.get_likely_regressions("label")) possible_label_count += len(p.get_possible_regressions("label")) group_regressions = p.get_regressions("group") likely_group_count += len(p.get_likely_regressions("label")) possible_group_count += len(p.get_possible_regressions("label")) if len(label_regressions) > 0 or len(group_regressions) > 0: backout_regressions[backout] = { "label": label_regressions, "group": group_regressions, } print(f"Likely labels for backouts: {likely_label_count}") print(f"Likely groups for backouts: {likely_group_count}") print(f"Possible labels for backouts: {possible_label_count}") print(f"Possible groups for backouts: {possible_group_count}") backedout_regressions = {} for backedout in tqdm(backedouts): p = Push(backedout) label_regressions = p.get_regressions("label") group_regressions = p.get_regressions("group") if (len(p.get_likely_regressions("label")) == 0 or len(p.get_likely_regressions("group")) == 0): backedout_regressions[backedout] = { "label": label_regressions, "group": group_regressions, } with open("backout_regressions.json", "w") as f: json.dump(backout_regressions, f) with open("backedout_regressions.json", "w") as f: json.dump(backedout_regressions, f)
def test_get_revs(fake_hg_repo): hg, local, remote = fake_hg_repo add_file(hg, local, "file1", "1\n2\n3\n4\n5\n6\n7\n") revision1 = commit(hg) revs = repository.get_revs(hg) assert len(revs) == 1, "There should be one revision now" assert revs[0].decode("ascii") == revision1 add_file(hg, local, "file2", "1\n2\n3\n4\n5\n6\n7\n") revision2 = commit(hg) revs = repository.get_revs(hg) assert len(revs) == 2, "There should be two revisions now" assert revs[0].decode("ascii") == revision1 assert revs[1].decode("ascii") == revision2
def test_hg_modified_files(fake_hg_repo): hg, local, remote = fake_hg_repo add_file(hg, local, "f1", "1\n2\n3\n4\n5\n6\n7\n") revision1 = commit(hg, date=datetime(1991, 4, 16, tzinfo=timezone.utc)) add_file(hg, local, "f2", "1\n2\n3\n4\n5\n6\n7\n") revision2 = commit(hg, "Bug 123 - Prova. r=moz,rev2") hg.copy( bytes(os.path.join(local, "f2"), "ascii"), bytes(os.path.join(local, "f2copy"), "ascii"), ) revision3 = commit(hg, "Copy") hg.move( bytes(os.path.join(local, "f2copy"), "ascii"), bytes(os.path.join(local, "f2copymove"), "ascii"), ) revision4 = commit(hg, "Move") hg.push(dest=bytes(remote, "ascii")) revs = repository.get_revs(hg, revision1) commits = repository.hg_log(hg, revs) repository.path_to_component = {} for c in commits: repository.hg_modified_files(hg, c) assert commits[0].node == revision1 assert commits[0].files == ["f1"] assert commits[0].file_copies == {} assert commits[1].node == revision2 assert commits[1].files == ["f2"] assert commits[1].file_copies == {} assert commits[2].node == revision3 assert commits[2].files == ["f2copy"] assert commits[2].file_copies == {"f2": "f2copy"} assert commits[3].node == revision4 assert commits[3].files == ["f2copy", "f2copymove"] assert commits[3].file_copies == {"f2copy": "f2copymove"}
def get_commits_to_ignore(repo_dir): commits_to_ignore = [] # TODO: Make repository analyze all commits, even those to ignore, but add a field "ignore" or a function should_ignore that analyzes the commit data. This way we don't have to clone the Mercurial repository in this script. with hglib.open(repo_dir) as hg: revs = repository.get_revs(hg, -10000) commits = repository.hg_log_multi(repo_dir, revs) commits_to_ignore = [] def append_commits_to_ignore(commits, type_): for commit in commits: commits_to_ignore.append( { "mercurial_rev": commit.node, "git_rev": vcs_map.mercurial_to_git(commit.node), "type": type_, } ) append_commits_to_ignore( list(repository.get_commits_to_ignore(repo_dir, commits)), "" ) logger.info( f"{len(commits_to_ignore)} commits to ignore (excluding backed-out commits)" ) append_commits_to_ignore( (commit for commit in commits if commit.backedoutby), "backedout" ) logger.info( f"{len(commits_to_ignore)} commits to ignore (including backed-out commits)" ) with open("commits_to_ignore.csv", "w") as f: writer = csv.DictWriter(f, fieldnames=["mercurial_rev", "git_rev", "type"]) writer.writeheader() writer.writerows(commits_to_ignore) return commits_to_ignore
def test_hg_log(fake_hg_repo): hg, local, remote = fake_hg_repo add_file(hg, local, "file1", "1\n2\n3\n4\n5\n6\n7\n") revision1 = commit(hg, date=datetime(1991, 4, 16, tzinfo=timezone.utc)) first_push_date = datetime.utcnow() hg.push(dest=bytes(remote, "ascii")) add_file(hg, local, "file2", "1\n2\n3\n4\n5\n6\n7\n") revision2 = commit(hg, "Bug 123 - Prova. r=moz,rev2") hg.copy( bytes(os.path.join(local, "file2"), "ascii"), bytes(os.path.join(local, "file2copy"), "ascii"), ) revision3 = commit(hg) hg.move( bytes(os.path.join(local, "file2copy"), "ascii"), bytes(os.path.join(local, "file2copymove"), "ascii"), ) revision4 = commit(hg) hg.backout( rev=revision4, message=f"Backout {revision4[:12]}", user="******", date=datetime(2019, 4, 16, tzinfo=timezone.utc), ) revision5 = hg.log(limit=1)[0][1].decode("ascii") # Wait one second, to have a different pushdate. time.sleep(1) second_push_date = datetime.utcnow() hg.push(dest=bytes(remote, "ascii")) add_file(hg, local, "file3", "1\n2\n3\n4\n5\n6\n7\n") revision6 = commit(hg) copy_pushlog_database(remote, local) revs = repository.get_revs(hg) # Wait one second, to have a different pushdate. time.sleep(1) hg_log_date = datetime.utcnow() commits = repository.hg_log(hg, revs) assert len(commits) == 6, "hg log should return six commits" assert commits[0].node == revision1 assert commits[0].author == "Moz Illa <*****@*****.**>" assert commits[0].desc == "Commit A file1" assert commits[0].date == datetime(1991, 4, 16) assert (first_push_date - relativedelta(seconds=1) <= commits[0].pushdate <= first_push_date + relativedelta(seconds=1)) assert commits[0].bug_id is None assert commits[0].backedoutby == "" assert commits[0].author_email == "*****@*****.**" assert commits[0].reviewers == tuple() assert commits[1].node == revision2 assert commits[1].author == "Moz Illa <*****@*****.**>" assert commits[1].desc == "Bug 123 - Prova. r=moz,rev2" assert commits[1].date == datetime(2019, 4, 16) assert (second_push_date - relativedelta(seconds=1) <= commits[1].pushdate <= second_push_date + relativedelta(seconds=1)) assert commits[1].bug_id == 123 assert commits[1].backedoutby == "" assert commits[1].author_email == "*****@*****.**" assert set(commits[1].reviewers) == {"moz", "rev2"} assert commits[2].node == revision3 assert commits[2].author == "Moz Illa <*****@*****.**>" assert commits[2].desc == "Commit A file2copy" assert commits[2].date == datetime(2019, 4, 16) assert (second_push_date - relativedelta(seconds=1) <= commits[2].pushdate <= second_push_date + relativedelta(seconds=1)) assert commits[2].bug_id is None assert commits[2].backedoutby == "" assert commits[2].author_email == "*****@*****.**" assert commits[2].reviewers == tuple() assert commits[3].node == revision4 assert commits[3].author == "Moz Illa <*****@*****.**>" assert commits[3].desc == "Commit A file2copymove R file2copy" assert commits[3].date == datetime(2019, 4, 16) assert (second_push_date - relativedelta(seconds=1) <= commits[3].pushdate <= second_push_date + relativedelta(seconds=1)) assert commits[3].bug_id is None assert commits[3].backedoutby == revision5 assert commits[3].author_email == "*****@*****.**" assert commits[3].reviewers == tuple() assert commits[4].node == revision5 assert commits[4].author == "sheriff" assert commits[4].desc == f"Backout {revision4[:12]}" assert commits[4].date == datetime(2019, 4, 16) assert (second_push_date - relativedelta(seconds=1) <= commits[4].pushdate <= second_push_date + relativedelta(seconds=1)) assert commits[4].bug_id is None assert commits[4].backedoutby == "" assert commits[4].author_email == "sheriff" assert commits[4].reviewers == tuple() assert commits[5].node == revision6 assert commits[5].author == "Moz Illa <*****@*****.**>" assert commits[5].desc == "Commit A file3" assert commits[5].date == datetime(2019, 4, 16) assert (hg_log_date - relativedelta(seconds=1) <= commits[5].pushdate <= hg_log_date + relativedelta(seconds=1)) assert commits[5].bug_id is None assert commits[5].backedoutby == "" assert commits[5].author_email == "*****@*****.**" assert commits[5].reviewers == tuple() commits = repository.hg_log(hg, [revs[1], revs[3]]) assert len(commits) == 3, "hg log should return three commits" assert commits[0].node == revision2 assert commits[1].node == revision3 assert commits[2].node == revision4
def boot_worker() -> None: # Clone autoland def clone_autoland() -> None: logger.info(f"Cloning autoland in {REPO_DIR}...") repository.clone(REPO_DIR, "https://hg.mozilla.org/integration/autoland") def extract_past_failures_label() -> None: try: utils.extract_file( os.path.join("data", test_scheduling.PAST_FAILURES_LABEL_DB) ) logger.info("Label-level past failures DB extracted.") except FileNotFoundError: assert ALLOW_MISSING_MODELS logger.info( "Label-level past failures DB not extracted, but missing models are allowed." ) def extract_failing_together_label() -> None: try: utils.extract_file( os.path.join("data", test_scheduling.FAILING_TOGETHER_LABEL_DB) ) logger.info("Failing together label DB extracted.") except FileNotFoundError: assert ALLOW_MISSING_MODELS logger.info( "Failing together label DB not extracted, but missing models are allowed." ) def extract_failing_together_config_group() -> None: try: utils.extract_file( os.path.join("data", test_scheduling.FAILING_TOGETHER_CONFIG_GROUP_DB) ) logger.info("Failing together config/group DB extracted.") except FileNotFoundError: assert ALLOW_MISSING_MODELS logger.info( "Failing together config/group DB not extracted, but missing models are allowed." ) def extract_past_failures_group() -> None: try: utils.extract_file( os.path.join("data", test_scheduling.PAST_FAILURES_GROUP_DB) ) logger.info("Group-level past failures DB extracted.") except FileNotFoundError: assert ALLOW_MISSING_MODELS logger.info( "Group-level past failures DB not extracted, but missing models are allowed." ) def extract_touched_together() -> None: try: utils.extract_file( os.path.join("data", test_scheduling.TOUCHED_TOGETHER_DB) ) logger.info("Touched together DB extracted.") except FileNotFoundError: assert ALLOW_MISSING_MODELS logger.info( "Touched together DB not extracted, but missing models are allowed." ) def extract_commits() -> bool: try: utils.extract_file(f"{repository.COMMITS_DB}.zst") logger.info("Commits DB extracted.") return True except FileNotFoundError: logger.info("Commits DB not extracted, but missing models are allowed.") assert ALLOW_MISSING_MODELS return False def extract_commit_experiences() -> None: try: utils.extract_file(os.path.join("data", repository.COMMIT_EXPERIENCES_DB)) logger.info("Commit experiences DB extracted.") except FileNotFoundError: assert ALLOW_MISSING_MODELS logger.info( "Commit experiences DB not extracted, but missing models are allowed." ) @tenacity.retry( stop=tenacity.stop_after_attempt(7), wait=tenacity.wait_exponential(multiplier=1, min=1, max=8), ) def retrieve_schedulable_tasks() -> None: r = requests.get( "https://hg.mozilla.org/integration/autoland/json-pushes?version=2&tipsonly=1" ) r.raise_for_status() revs = [ push_obj["changesets"][0] for push_id, push_obj in r.json()["pushes"].items() ] logger.info(f"Retrieving known tasks from {revs}") # Store in a file the list of tasks in the latest autoland pushes. # We use more than one to protect ourselves from broken decision tasks. known_tasks = set() for rev in revs: r = requests.get( f"https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/gecko.v2.autoland.revision.{rev}.taskgraph.decision/artifacts/public/target-tasks.json" ) if r.ok: known_tasks.update(r.json()) logger.info(f"Retrieved {len(known_tasks)} tasks") assert len(known_tasks) > 0 with open("known_tasks", "w") as f: f.write("\n".join(known_tasks)) with concurrent.futures.ThreadPoolExecutor() as executor: clone_autoland_future = executor.submit(clone_autoland) retrieve_schedulable_tasks_future = executor.submit(retrieve_schedulable_tasks) commits_db_extracted = extract_commits() extract_commit_experiences() extract_touched_together() extract_past_failures_label() extract_past_failures_group() extract_failing_together_label() extract_failing_together_config_group() if commits_db_extracted: # Update the commits DB. logger.info("Browsing all commits...") nodes = collections.deque( (commit["node"] for commit in repository.get_commits()), maxlen=4096 ) nodes.reverse() logger.info("All commits browsed.") # Wait repository to be cloned, as it's required to call repository.download_commits. logger.info("Waiting autoland to be cloned...") clone_autoland_future.result() with hglib.open(REPO_DIR) as hg: # Try using nodes backwards, in case we have some node that was on central at the time # we mined commits, but is not yet on autoland. for node in nodes: try: revs = repository.get_revs(hg, rev_start=f"children({node})") break except hglib.error.CommandError as e: if b"abort: unknown revision" not in e.err: raise logger.info("Updating commits DB...") commits = repository.download_commits( REPO_DIR, revs=revs, use_single_process=True ) logger.info("Commits DB updated.") logger.info("Updating touched together DB...") if len(commits) > 0: # Update the touched together DB. update_touched_together_gen = test_scheduling.update_touched_together() next(update_touched_together_gen) update_touched_together_gen.send(commits[-1]["node"]) try: update_touched_together_gen.send(None) except StopIteration: pass logger.info("Touched together DB updated.") # Wait list of schedulable tasks to be downloaded and written to disk. retrieve_schedulable_tasks_future.result() logger.info("Worker boot done")
def apply_phab(self, hg, phabricator_deployment, diff_id): if phabricator_deployment == PHAB_PROD: api_key = get_secret("PHABRICATOR_TOKEN") url = get_secret("PHABRICATOR_URL") else: api_key = get_secret("PHABRICATOR_DEV_TOKEN") url = get_secret("PHABRICATOR_DEV_URL") phabricator_api = PhabricatorAPI(api_key=api_key, url=url) # Get the stack of patches stack = phabricator_api.load_patches_stack(diff_id) assert len(stack) > 0, "No patches to apply" # Find the first unknown base revision needed_stack = [] revisions = {} for patch in reversed(stack): needed_stack.insert(0, patch) # Stop as soon as a base revision is available if self.has_revision(hg, patch.base_revision): logger.info( f"Stopping at diff {patch.id} and revision {patch.base_revision}" ) break if not needed_stack: logger.info("All the patches are already applied") return # Load all the diff revisions diffs = phabricator_api.search_diffs(diff_phid=[p.phid for p in stack]) revisions = { diff["phid"]: phabricator_api.load_revision(rev_phid=diff["revisionPHID"], attachments={"reviewers": True}) for diff in diffs } # Update repo to base revision hg_base = needed_stack[0].base_revision if not self.has_revision(hg, hg_base): logger.warning( "Missing base revision {} from Phabricator".format(hg_base)) hg_base = "tip" if hg_base: hg.update(rev=hg_base, clean=True) logger.info(f"Updated repo to {hg_base}") if self.git_repo_dir and hg_base != "tip": try: self.git_base = tuple( vcs_map.mercurial_to_git(self.git_repo_dir, [hg_base]))[0] subprocess.run( [ "git", "checkout", "-b", "analysis_branch", self.git_base ], check=True, cwd=self.git_repo_dir, ) logger.info(f"Updated git repo to {self.git_base}") except Exception as e: logger.info( f"Updating git repo to Mercurial {hg_base} failed: {e}" ) def load_user(phid): if phid.startswith("PHID-USER"): return phabricator_api.load_user(user_phid=phid) elif phid.startswith("PHID-PROJ"): # TODO: Support group reviewers somehow. logger.info(f"Skipping group reviewer {phid}") else: raise Exception(f"Unsupported reviewer {phid}") for patch in needed_stack: revision = revisions[patch.phid] message = "{}\n\n{}".format(revision["fields"]["title"], revision["fields"]["summary"]) author_name = None author_email = None if patch.commits: author_name = patch.commits[0]["author"]["name"] author_email = patch.commits[0]["author"]["email"] if author_name is None: author = load_user(revision["fields"]["authorPHID"]) author_name = author["fields"]["realName"] # XXX: Figure out a way to know the email address of the author. author_email = author["fields"]["username"] reviewers = list( filter( None, (load_user(reviewer["reviewerPHID"]) for reviewer in revision["attachments"]["reviewers"]["reviewers"]), )) reviewers = set(reviewer["fields"]["username"] for reviewer in reviewers) if len(reviewers): message = replace_reviewers(message, reviewers) logger.info( f"Applying {patch.phid} from revision {revision['id']}: {message}" ) hg.import_( patches=io.BytesIO(patch.patch.encode("utf-8")), message=message.encode("utf-8"), user=f"{author_name} <{author_email}>".encode("utf-8"), ) if self.git_repo_dir: cmd = [ "git", "cinnabar", "fetch", "hg::", repo_dir, latest_rev ] proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, cwd=self.git_repo_dir) subprocess.run( [ "git", "-c", f"user.name={author_name}", "-c", f"user.email={author_email}", "commit", "-am", message, ], check=True, cwd=self.git_repo_dir, ) latest_rev = repository.get_revs(hg, f"-{len(stack)}")
def get_commits_to_ignore(self): logger.info("Download previous commits to ignore...") db.download(IGNORED_COMMITS_DB) logger.info("Get previously classified commits...") prev_commits_to_ignore = list(db.read(IGNORED_COMMITS_DB)) logger.info(f"Already found {len(prev_commits_to_ignore)} commits to ignore...") # When we already have some analyzed commits, re-analyze the last 3500 to make sure # we didn't miss back-outs that happened since the last analysis. if len(prev_commits_to_ignore) > 0: first_commit_to_reanalyze = ( -3500 if len(prev_commits_to_ignore) >= 3500 else 0 ) rev_start = "children({})".format( prev_commits_to_ignore[first_commit_to_reanalyze]["rev"] ) else: rev_start = 0 with hglib.open(self.mercurial_repo_dir) as hg: revs = repository.get_revs(hg, rev_start) # Drop commits which are not yet present in the mercurial <-> git mapping. while len(revs) > 0: try: vcs_map.mercurial_to_git(revs[-1].decode("ascii")) break except Exception as e: if not str(e).startswith("Missing mercurial commit in the VCS map"): raise revs.pop() commits = repository.hg_log_multi(self.mercurial_repo_dir, revs) repository.set_commits_to_ignore(self.mercurial_repo_dir, commits) chosen_commits = set() commits_to_ignore = [] for commit in commits: if commit.ignored or commit.backedoutby: commits_to_ignore.append( { "rev": commit.node, "type": "backedout" if commit.backedoutby else "", } ) chosen_commits.add(commit.node) logger.info(f"{len(commits_to_ignore)} new commits to ignore...") for prev_commit in prev_commits_to_ignore[::-1]: if prev_commit["rev"] not in chosen_commits: commits_to_ignore.append(prev_commit) chosen_commits.add(prev_commit["rev"]) logger.info(f"{len(commits_to_ignore)} commits to ignore...") logger.info( "...of which {} are backed-out".format( sum(1 for commit in commits_to_ignore if commit["type"] == "backedout") ) ) db.write(IGNORED_COMMITS_DB, commits_to_ignore) zstd_compress(IGNORED_COMMITS_DB) db.upload(IGNORED_COMMITS_DB)