def get_bugbug_labels(self, kind='bug'): assert kind in ['bug', 'regression'] classes = {} for bug_id, category in labels.get_labels('bug_nobug'): assert category in ['True', 'False'], f'unexpected category {category}' if kind == 'bug': classes[int(bug_id)] = 1 if category == 'True' else 0 elif kind == 'regression': if category == 'False': classes[int(bug_id)] = 0 for bug_id, category in labels.get_labels('regression_bug_nobug'): assert category in [ 'nobug', 'bug_unknown_regression', 'bug_no_regression', 'regression' ], f'unexpected category {category}' if kind == 'bug': classes[int(bug_id)] = 1 if category != 'nobug' else 0 elif kind == 'regression': if category == 'bug_unknown_regression': continue classes[int(bug_id)] = 1 if category == 'regression' else 0 # Augment labes by using bugs marked as 'regression' or 'feature', as they are basically labelled. bug_ids = set() for bug in bugzilla.get_bugs(): bug_id = int(bug['id']) bug_ids.add(bug_id) if bug_id in classes: continue if any(keyword in bug['keywords'] for keyword in ['regression', 'talos-regression']) or ( 'cf_has_regression_range' in bug and bug['cf_has_regression_range'] == 'yes'): classes[bug_id] = 1 elif any(keyword in bug['keywords'] for keyword in ['feature']): classes[bug_id] = 0 elif kind == 'regression': for history in bug['history']: for change in history['changes']: if change['field_name'] == 'keywords' and change[ 'removed'] == 'regression': classes[bug_id] = 0 # Remove labels which belong to bugs for which we have no data. return { bug_id: label for bug_id, label in classes.items() if bug_id in bug_ids }
def get_labels(self): classes = {} for bug_id, category in labels.get_labels("tracking"): assert category in ["True", "False"], f"unexpected category {category}" classes[int(bug_id)] = 1 if category == "True" else 0 for bug_data in bugzilla.get_bugs(): bug_id = int(bug_data["id"]) for entry in bug_data["history"]: for change in entry["changes"]: if change["field_name"].startswith("cf_tracking_firefox"): if change["added"] in ["blocking", "+"]: classes[bug_id] = 1 elif change["added"] == "-": classes[bug_id] = 0 if bug_data["resolution"] in ["INVALID", "DUPLICATE"]: continue if bug_id not in classes: classes[bug_id] = 0 return classes
def get_labels(self): classes = {} regressors = set(r[0] for r in labels.get_labels("regressor")) for commit_data in repository.get_commits(): if commit_data["ever_backedout"]: continue node = commit_data["node"] if node in regressors: classes[node] = 1 else: push_date = dateutil.parser.parse(commit_data["pushdate"]) # The labels we have are only from 2016-11-01. # TODO: Automate collection of labels and somehow remove this check. if push_date < datetime(2016, 11, 1): continue # We remove the last 6 months, as there could be regressions which haven't been filed yet. if push_date > datetime.utcnow() - relativedelta(months=6): continue classes[node] = 0 print("{} commits caused regressions".format( sum(1 for label in classes.values() if label == 1))) print("{} commits did not cause regressions".format( sum(1 for label in classes.values() if label == 0))) return classes, [0, 1]
def get_labels(self): classes = {} for bug_id, category in labels.get_labels('tracking'): assert category in ['True', 'False'], f'unexpected category {category}' classes[int(bug_id)] = 1 if category == 'True' else 0 for bug_data in bugzilla.get_bugs(): bug_id = int(bug_data['id']) for entry in bug_data['history']: for change in entry['changes']: if change['field_name'].startswith('cf_tracking_firefox'): if change['added'] in ['blocking', '+']: classes[bug_id] = 1 elif change['added'] == '-': classes[bug_id] = 0 if bug_data['resolution'] in ['INVALID', 'DUPLICATE']: continue if bug_id not in classes: classes[bug_id] = 0 return classes
def get_labels(self): classes = {} # Commits in regressor or regression bugs usually are not formatting changes. regression_related_bugs = set( sum( (bug["regressed_by"] + bug["regressions"] for bug in bugzilla.get_bugs()), [], )) for commit_data in repository.get_commits(include_ignored=True): if commit_data["backedoutby"]: continue node = commit_data["node"] if commit_data["ignored"]: classes[node] = 1 elif commit_data["bug_id"] in regression_related_bugs: classes[node] = 0 for node, label in labels.get_labels("annotateignore"): classes[node] = int(label) print("{} commits that can be ignored".format( sum(1 for label in classes.values() if label == 1))) print("{} commits that cannot be ignored".format( sum(1 for label in classes.values() if label == 0))) return classes, [0, 1]
def get_labels(self): classes = {} for bug_id, category in labels.get_labels("tracking"): assert category in ["True", "False"], f"unexpected category {category}" classes[int(bug_id)] = 1 if category == "True" else 0 for bug_data in bugzilla.get_bugs(): bug_id = int(bug_data["id"]) for entry in bug_data["history"]: for change in entry["changes"]: if change["field_name"].startswith("cf_tracking_firefox"): if change["added"] in ["blocking", "+"]: classes[bug_id] = 1 elif change["added"] == "-": classes[bug_id] = 0 if bug_data["resolution"] in ["INVALID", "DUPLICATE"]: continue if bug_id not in classes: classes[bug_id] = 0 return classes, [0, 1]
def get_commits_to_ignore(self) -> None: assert db.download(repository.COMMITS_DB) ignored = set() commits_to_ignore = [] all_commits = set() annotate_ignore_nodes = { node for node, label in labels.get_labels("annotateignore") if label == "1" } for commit in repository.get_commits(include_no_bug=True, include_backouts=True, include_ignored=True): all_commits.add(commit["node"][:12]) if (commit["ignored"] or commit["backedoutby"] or not commit["bug_id"] or len(commit["backsout"]) > 0 or repository.is_wptsync(commit) or commit["node"] in annotate_ignore_nodes): commits_to_ignore.append({ "rev": commit["node"], "type": "backedout" if commit["backedoutby"] else "", }) ignored.add(commit["node"][:12]) if len(commit["backsout"]) > 0: for backedout in commit["backsout"]: if backedout[:12] in ignored: continue ignored.add(backedout[:12]) commits_to_ignore.append({ "rev": backedout, "type": "backedout" }) logger.info(f"{len(commits_to_ignore)} commits to ignore...") # Skip backed-out commits which aren't in the repository (commits which landed *before* the Mercurial history # started, and backouts which mentioned a bad hash in their message). commits_to_ignore = [ c for c in commits_to_ignore if c["rev"][:12] in all_commits ] logger.info(f"{len(commits_to_ignore)} commits to ignore...") logger.info("...of which {} are backed-out".format( sum(1 for commit in commits_to_ignore if commit["type"] == "backedout"))) db.write(IGNORED_COMMITS_DB, commits_to_ignore) zstd_compress(IGNORED_COMMITS_DB) db.upload(IGNORED_COMMITS_DB)
def get_labels(self): classes = self.get_bugbug_labels('bug') classes = {bug_id: 'd' for bug_id, label in classes.items() if label == 1} for bug_id, label in labels.get_labels('defect_feature_task'): assert label in ['d', 'f', 't'] classes[int(bug_id)] = label print('{} defects'.format(sum(1 for label in classes.values() if label == 'd'))) print('{} features'.format(sum(1 for label in classes.values() if label == 'f'))) print('{} tasks'.format(sum(1 for label in classes.values() if label == 't'))) return classes
def get_bugbug_labels(self, kind="bug"): assert kind in ["bug", "regression", "defect_enhancement_task"] classes = {} for bug_id, category in labels.get_labels("bug_nobug"): assert category in ["True", "False"], f"unexpected category {category}" if kind == "bug": classes[int(bug_id)] = 1 if category == "True" else 0 elif kind == "regression": if category == "False": classes[int(bug_id)] = 0 elif kind == "defect_enhancement_task": if category == "True": classes[int(bug_id)] = "defect" for bug_id, category in labels.get_labels("regression_bug_nobug"): assert category in [ "nobug", "bug_unknown_regression", "bug_no_regression", "regression", ], f"unexpected category {category}" if kind == "bug": classes[int(bug_id)] = 1 if category != "nobug" else 0 elif kind == "regression": if category == "bug_unknown_regression": continue classes[int(bug_id)] = 1 if category == "regression" else 0 elif kind == "defect_enhancement_task": if category != "nobug": classes[int(bug_id)] = "defect" defect_enhancement_task_e = { bug_id: category for bug_id, category in labels.get_labels( "defect_enhancement_task_e") } defect_enhancement_task_p = { bug_id: category for bug_id, category in labels.get_labels( "defect_enhancement_task_p") } defect_enhancement_task_s = { bug_id: category for bug_id, category in labels.get_labels( "defect_enhancement_task_s") } defect_enhancement_task_h = { bug_id: category for bug_id, category in labels.get_labels( "defect_enhancement_task_h") } defect_enhancement_task_common = ( (bug_id, category) for bug_id, category in defect_enhancement_task_p.items() if (bug_id not in defect_enhancement_task_e or defect_enhancement_task_e[bug_id] == defect_enhancement_task_p[bug_id]) and ( bug_id not in defect_enhancement_task_s or defect_enhancement_task_s[bug_id] == defect_enhancement_task_p[bug_id]) and ( bug_id not in defect_enhancement_task_h or defect_enhancement_task_h[bug_id] == defect_enhancement_task_p[bug_id])) for bug_id, category in itertools.chain( labels.get_labels("defect_enhancement_task"), defect_enhancement_task_common): assert category in ["defect", "enhancement", "task"] if kind == "bug": classes[int(bug_id)] = 1 if category == "defect" else 0 elif kind == "regression": if category in ["enhancement", "task"]: classes[int(bug_id)] = 0 elif kind == "defect_enhancement_task": classes[int(bug_id)] = category # Augment labes by using bugs marked as 'regression' or 'feature', as they are basically labelled. # And also use the new bug type field. bug_ids = set() for bug in bugzilla.get_bugs(): bug_id = int(bug["id"]) bug_ids.add(bug_id) if bug_id in classes: continue if any(keyword in bug["keywords"] for keyword in ["regression", "talos-regression"]) or ( "cf_has_regression_range" in bug and bug["cf_has_regression_range"] == "yes"): if kind in ["bug", "regression"]: classes[bug_id] = 1 else: classes[bug_id] = "defect" elif any(keyword in bug["keywords"] for keyword in ["feature"]): if kind in ["bug", "regression"]: classes[bug_id] = 0 else: classes[bug_id] = "enhancement" elif kind == "regression": for history in bug["history"]: for change in history["changes"]: if change["field_name"] == "keywords": if "regression" in [ k.strip() for k in change["removed"].split(",") ]: classes[bug_id] = 0 elif "regression" in [ k.strip() for k in change["added"].split(",") ]: classes[bug_id] = 1 # The conditions to use the 'defect' type are more restricted. can_use_type = False can_use_defect_type = False # We can use the type as a label for all bugs after the migration (https://bugzilla.mozilla.org/show_bug.cgi?id=1524738), if they are not defects. if bug["id"] > 1_540_807: can_use_type = True # And we can use the type as a label for bugs whose type has been modified. # For 'defects', we can't use them as labels unless resulting from a change, because bugs are filed by default as 'defect' and so they could be mistakes. if not can_use_type or bug["type"] == "defect": for history in bug["history"]: for change in history["changes"]: if change["field_name"] == "type": can_use_type = can_use_defect_type = True if can_use_type: if bug["type"] == "enhancement": if kind == "bug": classes[int(bug_id)] = 0 elif kind == "regression": classes[int(bug_id)] = 0 elif kind == "defect_enhancement_task": classes[int(bug_id)] = "enhancement" elif bug["type"] == "task": if kind == "bug": classes[int(bug_id)] = 0 elif kind == "regression": classes[int(bug_id)] = 0 elif kind == "defect_enhancement_task": classes[int(bug_id)] = "task" elif bug["type"] == "defect" and can_use_defect_type: if kind == "bug": classes[int(bug_id)] = 1 elif kind == "defect_enhancement_task": classes[int(bug_id)] = "defect" # Remove labels which belong to bugs for which we have no data. return { bug_id: label for bug_id, label in classes.items() if bug_id in bug_ids }
def get_bugbug_labels(self, kind='bug'): assert kind in ['bug', 'regression', 'defect_enhancement_task'] classes = {} for bug_id, category in labels.get_labels('bug_nobug'): assert category in ['True', 'False'], f'unexpected category {category}' if kind == 'bug': classes[int(bug_id)] = 1 if category == 'True' else 0 elif kind == 'regression': if category == 'False': classes[int(bug_id)] = 0 elif kind == 'defect_enhancement_task': if category == 'True': classes[int(bug_id)] = 'defect' for bug_id, category in labels.get_labels('regression_bug_nobug'): assert category in [ 'nobug', 'bug_unknown_regression', 'bug_no_regression', 'regression' ], f'unexpected category {category}' if kind == 'bug': classes[int(bug_id)] = 1 if category != 'nobug' else 0 elif kind == 'regression': if category == 'bug_unknown_regression': continue classes[int(bug_id)] = 1 if category == 'regression' else 0 elif kind == 'defect_enhancement_task': if category != 'nobug': classes[int(bug_id)] = 'defect' defect_enhancement_task_e = { bug_id: category for bug_id, category in labels.get_labels( 'defect_enhancement_task_e') } defect_enhancement_task_p = { bug_id: category for bug_id, category in labels.get_labels( 'defect_enhancement_task_p') } defect_enhancement_task_s = { bug_id: category for bug_id, category in labels.get_labels( 'defect_enhancement_task_s') } defect_enhancement_task_common = ( (bug_id, category) for bug_id, category in defect_enhancement_task_p.items() if (bug_id not in defect_enhancement_task_e or defect_enhancement_task_e[bug_id] == defect_enhancement_task_p[bug_id]) and ( bug_id not in defect_enhancement_task_s or defect_enhancement_task_s[bug_id] == defect_enhancement_task_p[bug_id])) for bug_id, category in itertools.chain( labels.get_labels('defect_enhancement_task'), defect_enhancement_task_common): assert category in ['d', 'e', 't'] if kind == 'bug': classes[int(bug_id)] = 1 if category == 'd' else 0 elif kind == 'regression': if category in ['e', 't']: classes[int(bug_id)] = 0 elif kind == 'defect_enhancement_task': if category == 'd': classes[int(bug_id)] = 'defect' elif category == 'e': classes[int(bug_id)] = 'enhancement' elif category == 't': classes[int(bug_id)] = 'task' # Augment labes by using bugs marked as 'regression' or 'feature', as they are basically labelled. # And also use the new bug type field. bug_ids = set() for bug in bugzilla.get_bugs(): bug_id = int(bug['id']) bug_ids.add(bug_id) if bug_id in classes: continue if any(keyword in bug['keywords'] for keyword in ['regression', 'talos-regression']) or ( 'cf_has_regression_range' in bug and bug['cf_has_regression_range'] == 'yes'): if kind in ['bug', 'regression']: classes[bug_id] = 1 else: classes[bug_id] = 'defect' elif any(keyword in bug['keywords'] for keyword in ['feature']): if kind in ['bug', 'regression']: classes[bug_id] = 0 else: classes[bug_id] = 'enhancement' elif kind == 'regression': for history in bug['history']: for change in history['changes']: if change['field_name'] == 'keywords': if 'regression' in change['removed'].split(','): classes[bug_id] = 0 elif 'regression' in change['added'].split(','): classes[bug_id] = 1 # The conditions to use the 'defect' type are more restricted. can_use_type = False can_use_defect_type = False # We can use the type as a label for all bugs after the migration (https://bugzilla.mozilla.org/show_bug.cgi?id=1524738), if they are not defects. if bug['id'] > 1540807: can_use_type = True # And we can use the type as a label for bugs whose type has been modified. # For 'defects', we can't use them as labels unless resulting from a change, because bugs are filed by default as 'defect' and so they could be mistakes. if not can_use_type or bug['type'] == 'defect': for history in bug['history']: for change in history['changes']: if change['field_name'] == 'type': can_use_type = can_use_defect_type = True if can_use_type: if bug['type'] == 'enhancement': if kind == 'bug': classes[int(bug_id)] = 0 elif kind == 'regression': classes[int(bug_id)] = 0 elif kind == 'defect_enhancement_task': classes[int(bug_id)] = 'enhancement' elif bug['type'] == 'task': if kind == 'bug': classes[int(bug_id)] = 0 elif kind == 'regression': classes[int(bug_id)] = 0 elif kind == 'defect_enhancement_task': classes[int(bug_id)] = 'task' elif bug['type'] == 'defect' and can_use_defect_type: if kind == 'bug': classes[int(bug_id)] = 1 elif kind == 'defect_enhancement_task': classes[int(bug_id)] = 'defect' # Remove labels which belong to bugs for which we have no data. return { bug_id: label for bug_id, label in classes.items() if bug_id in bug_ids }
def get_bugbug_labels(self, kind='bug'): assert kind in ['bug', 'regression', 'defect_feature_task'] classes = {} for bug_id, category in labels.get_labels('bug_nobug'): assert category in ['True', 'False'], f'unexpected category {category}' if kind == 'bug': classes[int(bug_id)] = 1 if category == 'True' else 0 elif kind == 'regression': if category == 'False': classes[int(bug_id)] = 0 elif kind == 'defect_feature_task': if category == 'True': classes[int(bug_id)] = 'd' for bug_id, category in labels.get_labels('regression_bug_nobug'): assert category in [ 'nobug', 'bug_unknown_regression', 'bug_no_regression', 'regression' ], f'unexpected category {category}' if kind == 'bug': classes[int(bug_id)] = 1 if category != 'nobug' else 0 elif kind == 'regression': if category == 'bug_unknown_regression': continue classes[int(bug_id)] = 1 if category == 'regression' else 0 elif kind == 'defect_feature_task': if category != 'nobug': classes[int(bug_id)] = 'd' defect_feature_task_e = { bug_id: category for bug_id, category in labels.get_labels('defect_feature_task_e') } defect_feature_task_p = { bug_id: category for bug_id, category in labels.get_labels('defect_feature_task_p') } defect_feature_task_s = { bug_id: category for bug_id, category in labels.get_labels('defect_feature_task_s') } defect_feature_task_common = ( (bug_id, category) for bug_id, category in defect_feature_task_p.items() if (bug_id not in defect_feature_task_e or defect_feature_task_e[bug_id] == defect_feature_task_p[bug_id]) and (bug_id not in defect_feature_task_s or defect_feature_task_s[bug_id] == defect_feature_task_p[bug_id])) for bug_id, category in itertools.chain( labels.get_labels('defect_feature_task'), defect_feature_task_common): assert category in ['d', 'e', 't'] if kind == 'bug': classes[int(bug_id)] = 1 if category == 'd' else 0 elif kind == 'regression': if category in ['e', 't']: classes[int(bug_id)] = 0 elif kind == 'defect_feature_task': classes[int(bug_id)] = category # Augment labes by using bugs marked as 'regression' or 'feature', as they are basically labelled. bug_ids = set() for bug in bugzilla.get_bugs(): bug_id = int(bug['id']) bug_ids.add(bug_id) if bug_id in classes: continue if any(keyword in bug['keywords'] for keyword in ['regression', 'talos-regression']) or ( 'cf_has_regression_range' in bug and bug['cf_has_regression_range'] == 'yes'): if kind in ['bug', 'regression']: classes[bug_id] = 1 else: classes[bug_id] = 'd' elif any(keyword in bug['keywords'] for keyword in ['feature']): if kind in ['bug', 'regression']: classes[bug_id] = 0 else: classes[bug_id] = 'e' elif kind == 'regression': for history in bug['history']: for change in history['changes']: if change['field_name'] == 'keywords': if 'regression' in change['removed'].split(','): classes[bug_id] = 0 elif 'regression' in change['added'].split(','): classes[bug_id] = 1 # Remove labels which belong to bugs for which we have no data. return { bug_id: label for bug_id, label in classes.items() if bug_id in bug_ids }
def get_bugbug_labels(self, kind="bug"): assert kind in ["bug", "regression", "defect_enhancement_task"] classes = {} for bug_id, category in labels.get_labels("bug_nobug"): assert category in ["True", "False"], f"unexpected category {category}" if kind == "bug": classes[int(bug_id)] = 1 if category == "True" else 0 elif kind == "regression": if category == "False": classes[int(bug_id)] = 0 elif kind == "defect_enhancement_task": if category == "True": classes[int(bug_id)] = "defect" for bug_id, category in labels.get_labels("regression_bug_nobug"): assert category in [ "nobug", "bug_unknown_regression", "bug_no_regression", "regression", ], f"unexpected category {category}" if kind == "bug": classes[int(bug_id)] = 1 if category != "nobug" else 0 elif kind == "regression": if category == "bug_unknown_regression": continue classes[int(bug_id)] = 1 if category == "regression" else 0 elif kind == "defect_enhancement_task": if category != "nobug": classes[int(bug_id)] = "defect" defect_enhancement_task_e = { bug_id: category for bug_id, category in labels.get_labels("defect_enhancement_task_e") } defect_enhancement_task_p = { bug_id: category for bug_id, category in labels.get_labels("defect_enhancement_task_p") } defect_enhancement_task_s = { bug_id: category for bug_id, category in labels.get_labels("defect_enhancement_task_s") } defect_enhancement_task_h = { bug_id: category for bug_id, category in labels.get_labels("defect_enhancement_task_h") } defect_enhancement_task_common = ( (bug_id, category) for bug_id, category in defect_enhancement_task_p.items() if ( bug_id not in defect_enhancement_task_e or defect_enhancement_task_e[bug_id] == defect_enhancement_task_p[bug_id] ) and ( bug_id not in defect_enhancement_task_s or defect_enhancement_task_s[bug_id] == defect_enhancement_task_p[bug_id] ) and ( bug_id not in defect_enhancement_task_h or defect_enhancement_task_h[bug_id] == defect_enhancement_task_p[bug_id] ) ) for bug_id, category in itertools.chain( labels.get_labels("defect_enhancement_task"), defect_enhancement_task_common ): assert category in ["defect", "enhancement", "task"] if kind == "bug": classes[int(bug_id)] = 1 if category == "defect" else 0 elif kind == "regression": if category in ["enhancement", "task"]: classes[int(bug_id)] = 0 elif kind == "defect_enhancement_task": classes[int(bug_id)] = category # Augment labes by using bugs marked as 'regression' or 'feature', as they are basically labelled. # And also use the new bug type field. bug_ids = set() for bug in bugzilla.get_bugs(): bug_id = int(bug["id"]) bug_ids.add(bug_id) if bug_id in classes: continue if any( keyword in bug["keywords"] for keyword in ["regression", "talos-regression"] ) or ( "cf_has_regression_range" in bug and bug["cf_has_regression_range"] == "yes" ): if kind in ["bug", "regression"]: classes[bug_id] = 1 else: classes[bug_id] = "defect" elif any(keyword in bug["keywords"] for keyword in ["feature"]): if kind in ["bug", "regression"]: classes[bug_id] = 0 else: classes[bug_id] = "enhancement" elif kind == "regression": for history in bug["history"]: for change in history["changes"]: if change["field_name"] == "keywords": if "regression" in change["removed"].split(","): classes[bug_id] = 0 elif "regression" in change["added"].split(","): classes[bug_id] = 1 # The conditions to use the 'defect' type are more restricted. can_use_type = False can_use_defect_type = False # We can use the type as a label for all bugs after the migration (https://bugzilla.mozilla.org/show_bug.cgi?id=1524738), if they are not defects. if bug["id"] > 1_540_807: can_use_type = True # And we can use the type as a label for bugs whose type has been modified. # For 'defects', we can't use them as labels unless resulting from a change, because bugs are filed by default as 'defect' and so they could be mistakes. if not can_use_type or bug["type"] == "defect": for history in bug["history"]: for change in history["changes"]: if change["field_name"] == "type": can_use_type = can_use_defect_type = True if can_use_type: if bug["type"] == "enhancement": if kind == "bug": classes[int(bug_id)] = 0 elif kind == "regression": classes[int(bug_id)] = 0 elif kind == "defect_enhancement_task": classes[int(bug_id)] = "enhancement" elif bug["type"] == "task": if kind == "bug": classes[int(bug_id)] = 0 elif kind == "regression": classes[int(bug_id)] = 0 elif kind == "defect_enhancement_task": classes[int(bug_id)] = "task" elif bug["type"] == "defect" and can_use_defect_type: if kind == "bug": classes[int(bug_id)] = 1 elif kind == "defect_enhancement_task": classes[int(bug_id)] = "defect" # Remove labels which belong to bugs for which we have no data. return {bug_id: label for bug_id, label in classes.items() if bug_id in bug_ids}