Пример #1
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data["id"])

            for entry in bug_data["history"]:
                for change in entry["changes"]:
                    # Bugs that get dev-doc-needed removed from them at some point after it's been added (this suggests a false positive among human-analyzed bugs)
                    if (
                        change["field_name"] == "keywords"
                        and "dev-doc-needed" in change["removed"]
                        and "dev-doc-complete" not in change["added"]
                    ):
                        classes[bug_id] = 0
                    # Bugs that go from dev-doc-needed to dev-doc-complete are guaranteed to be good
                    # Bugs that go from not having dev-doc-needed to having dev-doc-complete are bugs
                    # that were missed by previous scans through content but someone realized it
                    # should have been flagged and updated the docs, found the docs already updated.
                    elif change["field_name"] == "keywords" and any(
                        keyword in change["added"]
                        for keyword in ["dev-doc-needed", "dev-doc-complete"]
                    ):
                        classes[bug_id] = 1

            if bug_id not in classes:
                classes[bug_id] = 0

        return classes, [0, 1]
Пример #2
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs():
            if bug_data["assigned_to_detail"]["email"] in ADDRESSES_TO_EXCLUDE:
                continue

            bug_id = int(bug_data["id"])
            classes[bug_id] = bug_data["assigned_to_detail"]["email"]

        assignee_counts = Counter(classes.values()).most_common()
        top_assignees = set(
            assignee
            for assignee, count in assignee_counts
            if count > MINIMUM_ASSIGNMENTS
        )

        print(f"{len(top_assignees)} assignees")
        for assignee, count in assignee_counts:
            print(f"{assignee}: {count}")

        classes = {
            bug_id: assignee
            for bug_id, assignee in classes.items()
            if assignee in top_assignees
        }

        return classes, set(classes.values())
Пример #3
0
    def get_labels(self):
        classes = {}

        for bug_id, category in labels.get_labels("tracking"):
            assert category in ["True", "False"], f"unexpected category {category}"
            classes[int(bug_id)] = 1 if category == "True" else 0

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data["id"])

            for entry in bug_data["history"]:
                for change in entry["changes"]:
                    if change["field_name"].startswith("cf_tracking_firefox"):
                        if change["added"] in ["blocking", "+"]:
                            classes[bug_id] = 1
                        elif change["added"] == "-":
                            classes[bug_id] = 0

            if bug_data["resolution"] in ["INVALID", "DUPLICATE"]:
                continue

            if bug_id not in classes:
                classes[bug_id] = 0

        return classes, [0, 1]
Пример #4
0
def test_rollback():
    model = QANeededModel()

    histories = {}
    for bug in bugzilla.get_bugs():
        histories[int(bug["id"])] = bug["history"]

    def rollback_point(bug_id):
        count = 0
        for history in histories[bug_id]:
            for change in history["changes"]:
                if model.rollback(change):
                    return count
                count += 1
        return count

    assert (
        rollback_point(1390433) == 35
    ), "A bug field should start with qawanted or qe-verify"
    assert (
        rollback_point(1389136) == 9
    ), "A bug field should start with qawanted or qe-verify"

    assert rollback_point(1388990) == 29
    assert rollback_point(1389223) == 8
Пример #5
0
    def get_labels(self):
        product_components = {}
        for bug_data in bugzilla.get_bugs():
            product_components[bug_data["id"]] = (
                bug_data["product"],
                bug_data["component"],
            )

        self.meaningful_product_components = self.get_meaningful_product_components(
            (
                (product, component)
                for product, component in product_components.values()
                if self.is_meaningful(product, component)
            )
        )

        classes = {}
        for bug_id, (product, component) in product_components.items():
            component = self.filter_component(product, component)

            if component:
                classes[bug_id] = component

        component_counts = Counter(classes.values()).most_common()
        top_components = set(component for component, count in component_counts)

        print(f"{len(top_components)} components")
        for component, count in component_counts:
            print(f"{component}: {count}")

        # Assert there is at least one bug for each conflated component.
        for conflated_component in self.CONFLATED_COMPONENTS:
            assert any(
                conflated_component == component
                for component, count in component_counts
            ), f"There should be at least one bug matching {conflated_component}*"

        # Assert there is at least one bug for each component the conflated components are mapped to.
        for conflated_component_mapping in self.CONFLATED_COMPONENTS_MAPPING.values():
            assert any(
                conflated_component_mapping == f"{product}::{component}"
                for product, component in product_components.values()
            ), f"There should be at least one bug in {conflated_component_mapping}"

        # Assert all conflated components are either in conflated_components_mapping or exist as components.
        for conflated_component in self.CONFLATED_COMPONENTS:
            assert conflated_component in self.CONFLATED_COMPONENTS_MAPPING or any(
                conflated_component == f"{product}::{component}"
                for product, component in product_components.values()
            ), f"It should be possible to map {conflated_component}"

        classes = {
            bug_id: component
            for bug_id, component in classes.items()
            if component in top_components
        }

        return classes, set(classes.values())
Пример #6
0
    def get_labels(self):
        classes = {}
        keyword_list = list(set(keyword_dict.values()))

        for bug_data in bugzilla.get_bugs():
            target = np.zeros(len(keyword_list))
            for keyword in bug_data["keywords"]:
                target[keyword_list.index(keyword_dict[keyword])] = 1

            classes[int(bug_data["id"])] = target

        return classes, keyword_list
Пример #7
0
def get_inconsistencies(find_all=False):
    inconsistencies = []

    for bug in bugzilla.get_bugs():
        try:
            rollback(bug, None, False, find_all)
        except Exception as e:
            print(bug["id"])
            print(e)
            inconsistencies.append(bug["id"])

    return inconsistencies
Пример #8
0
    def eval_tracking(self):
        results = []

        model = TrackingModel.load('trackingmodel')
        for bug in bugzilla.get_bugs():
            if self.is_tracking_decision_made(bug):
                continue

            if model.classify(bug)[0] == 1:
                results.append(bug['id'])

        with open('tracking.json', 'w') as f:
            json.dump(results, f)
Пример #9
0
    def eval_regression(self):
        results = {}

        model = RegressionModel.load('regressionmodel')
        for bug in bugzilla.get_bugs():
            if self.is_regression(bug):
                results[bug['id']] = True
            elif self.is_feature(bug):
                results[bug['id']] = False
            else:
                results[bug['id']] = True if model.classify(bug)[0] == 1 else False

        with open('regression.json', 'w') as f:
            json.dump(results, f)
Пример #10
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data["id"])

            for entry in bug_data["history"]:
                for change in entry["changes"]:
                    if any(
                        change["added"].startswith(label)
                        for label in ["qawanted", "qe-verify", "qaurgent"]
                    ):
                        classes[bug_id] = 1
            if bug_id not in classes:
                classes[bug_id] = 0

        return classes, [0, 1]
Пример #11
0
    def __init__(
        self,
        k=10,
        vectorizer=TfidfVectorizer(),
        cleanup_urls=True,
        nltk_tokenizer=False,
    ):
        super().__init__(cleanup_urls=cleanup_urls, nltk_tokenizer=nltk_tokenizer)
        self.vectorizer = vectorizer
        self.similarity_calculator = NearestNeighbors(n_neighbors=k)
        text = []
        self.bug_ids = []

        for bug in bugzilla.get_bugs():
            text.append(self.text_preprocess(self.get_text(bug), join=True))
            self.bug_ids.append(bug["id"])

        self.vectorizer.fit(text)
        self.similarity_calculator.fit(self.vectorizer.transform(text))
Пример #12
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data["id"])

            for attachment in bug_data["attachments"]:
                for flag in attachment["flags"]:
                    if not flag["name"].startswith("approval-mozilla-") or flag[
                        "status"
                    ] not in ["+", "-"]:
                        continue

                    if flag["status"] == "+":
                        classes[bug_id] = 1
                    elif flag["status"] == "-":
                        classes[bug_id] = 0

        return classes, [0, 1]
Пример #13
0
    def get_labels(self):

        classes = {}

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data['id'])
            component = self.filter_component(bug_data)

            if component:
                classes[bug_id] = component

        component_counts = Counter(classes.values()).most_common()
        top_components = set(component for component, count in component_counts)

        print(f'{len(top_components)} components')
        for component, count in component_counts:
            print(f'{component}: {count}')

        return {bug_id: component for bug_id, component in classes.items() if component in top_components}
Пример #14
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data["id"])

            for attachment in bug_data["attachments"]:
                for flag in attachment["flags"]:
                    if not flag["name"].startswith("approval-mozilla-") or flag[
                        "status"
                    ] not in ["+", "-"]:
                        continue

                    if flag["status"] == "+":
                        classes[bug_id] = 1
                    elif flag["status"] == "-":
                        classes[bug_id] = 0

        return classes
Пример #15
0
    def get_labels(self):
        product_components = {}
        for bug_data in bugzilla.get_bugs():
            product_components[bug_data['id']] = (bug_data['product'], bug_data['component'])

        def is_meaningful(product, component):
            return product in self.PRODUCTS and component not in ['General', 'Untriaged']

        product_component_counts = Counter(((product, component) for product, component in product_components.values() if is_meaningful(product, component))).most_common()

        max_count = product_component_counts[0][1]
        threshold = max_count / 100

        self.meaningful_product_components = set(product_component for product_component, count in product_component_counts if count > threshold)

        classes = {}
        for bug_id, (product, component) in product_components.items():
            component = self.filter_component(product, component)

            if component:
                classes[bug_id] = component

        component_counts = Counter(classes.values()).most_common()
        top_components = set(component for component, count in component_counts)

        print(f'{len(top_components)} components')
        for component, count in component_counts:
            print(f'{component}: {count}')

        # Assert there is at least one bug for each conflated component.
        for conflated_component in self.CONFLATED_COMPONENTS:
            assert any(conflated_component == component for component, count in component_counts), f'There should be at least one bug matching {conflated_component}*'

        # Assert there is at least one bug for each component the conflated components are mapped to.
        for conflated_component_mapping in self.CONFLATED_COMPONENTS_MAPPING.values():
            assert any(conflated_component_mapping == f'{product}::{component}' for product, component in product_components.values()), f'There should be at least one bug in {conflated_component_mapping}'

        # Assert all conflated components are either in conflated_components_mapping or exist as components.
        for conflated_component in self.CONFLATED_COMPONENTS:
            assert conflated_component in self.CONFLATED_COMPONENTS_MAPPING or \
                any(conflated_component == f'{product}::{component}' for product, component in product_components.values()), f'It should be possible to map {conflated_component}'

        return {bug_id: component for bug_id, component in classes.items() if component in top_components}
Пример #16
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data['id'])

            for entry in bug_data['history']:
                for change in entry['changes']:
                    if change['added'].startswith('qawanted'):
                        classes[bug_id] = 1
                    elif 'flags' in entry:
                        for flag in entry['flags']:
                            if flag['name'].startswith('qe-verify'):
                                classes[bug_id] = 1

            if bug_id not in classes:
                classes[bug_id] = 0

        return classes
Пример #17
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data['id'])

            for attachment in bug_data['attachments']:
                for flag in attachment['flags']:
                    if not flag['name'].startswith(
                            'approval-mozilla-') or flag['status'] not in [
                                '+', '-'
                            ]:
                        continue

                    if flag['status'] == '+':
                        classes[bug_id] = 1
                    elif flag['status'] == '-':
                        classes[bug_id] = 0

        return classes
Пример #18
0
    def get_labels(self):
        classes = {}

        for bug_id, category in labels.get_labels("tracking"):
            assert category in ["True",
                                "False"], f"unexpected category {category}"
            classes[int(bug_id)] = 1 if category == "True" else 0

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data["id"])

            flag_found = False
            tracking_flags = [
                flag for flag in bug_data.keys()
                if flag.startswith("cf_tracking_firefox")
            ]
            for tracking_flag in tracking_flags:
                if bug_data[tracking_flag] in ["blocking", "+"]:
                    classes[bug_id] = 1
                    flag_found = True
                elif bug_data[tracking_flag] == "-":
                    classes[bug_id] = 0
                    flag_found = True

            if not flag_found:
                for entry in bug_data["history"]:
                    for change in entry["changes"]:
                        if change["field_name"].startswith(
                                "cf_tracking_firefox"):
                            if change["added"] in ["blocking", "+"]:
                                classes[bug_id] = 1
                            elif change["added"] == "-":
                                classes[bug_id] = 0

            if bug_data["resolution"] in ["INVALID", "DUPLICATE"]:
                continue

            if bug_id not in classes:
                classes[bug_id] = 0

        return classes, [0, 1]
Пример #19
0
def classify_bugs(model_name, classifier):
    if classifier != "default":
        assert (
            model_name in MODELS_WITH_TYPE
        ), f"{classifier} is not a valid classifier type for {model_name}"

        model_file_name = f"{model_name}{classifier}model"
        model_name = f"{model_name}_{classifier}"
    else:
        model_file_name = f"{model_name}model"

    assert os.path.exists(
        model_file_name
    ), f"{model_file_name} does not exist. Train the model with trainer.py first."

    model_class = get_model_class(model_name)
    model = model_class.load(model_file_name)

    for bug in bugzilla.get_bugs():
        print(
            f'https://bugzilla.mozilla.org/show_bug.cgi?id={bug["id"]} - {bug["summary"]} '
        )

        if model.calculate_importance:
            probas, importance = model.classify(
                bug, probabilities=True, importances=True
            )

            feature_names = model.get_human_readable_feature_names()

            model.print_feature_importances(
                importance["importances"], feature_names, class_probabilities=probas
            )
        else:
            probas = model.classify(bug, probabilities=True, importances=False)

        if np.argmax(probas) == 1:
            print(f"Positive! {probas}")
        else:
            print(f"Negative! {probas}")
        input()
Пример #20
0
    def __init__(
        self, cleanup_urls=True, nltk_tokenizer=False, confidence_threshold=0.8
    ):
        super().__init__(
            cleanup_urls=cleanup_urls,
            nltk_tokenizer=nltk_tokenizer,
            confidence_threshold=confidence_threshold,
        )
        self.corpus = []
        self.bug_ids = []

        for bug in bugzilla.get_bugs():
            self.corpus.append(self.text_preprocess(self.get_text(bug)))
            self.bug_ids.append(bug["id"])

        indexes = list(range(len(self.corpus)))
        random.shuffle(indexes)
        self.corpus = [self.corpus[idx] for idx in indexes]
        self.bug_ids = [self.bug_ids[idx] for idx in indexes]

        self.model = BM25(self.corpus)
Пример #21
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs(include_invalid=True):
            bug_id = bug_data["id"]

            # Legitimate bugs
            if bug_data["resolution"] == "FIXED":
                classes[bug_id] = 0

            # Spam bugs
            elif (bug_data["product"] == "Invalid Bugs"
                  and bug_data["component"] == "General"):
                classes[bug_id] = 1

        print("{} bugs are classified as non-spam".format(
            sum(1 for label in classes.values() if label == 0)))
        print("{} bugs are classified as spam".format(
            sum(1 for label in classes.values() if label == 1)))

        return classes, [0, 1]
Пример #22
0
    def get_labels(self):
        bug_fix_times = []

        for bug in bugzilla.get_bugs():
            fix_time = bug_features.get_time_to_fix(bug)
            if fix_time is None:
                continue

            bug_fix_times.append((bug["id"], fix_time))

        def _quantiles(n):
            return statistics.quantiles(
                (fix_time for bug_id, fix_time in bug_fix_times), n=n
            )

        quantiles = _quantiles(2)

        logger.info(
            f"Max fix time: {max(fix_time for bug_id, fix_time in bug_fix_times)}"
        )
        logger.info(f"Fix time quantiles: {quantiles}")
        logger.info(f"Fix time quartiles: {_quantiles(4)}")
        logger.info(f"Fix time deciles: {_quantiles(10)}")

        classes = {}
        for bug_id, fix_time in bug_fix_times:
            for i, quantile in enumerate(quantiles):
                if fix_time <= quantile:
                    classes[bug_id] = i
                    break

            if bug_id not in classes:
                classes[bug_id] = i + 1

        for i in range(len(quantiles) + 1):
            logger.info(
                f"{sum(1 for label in classes.values() if label == i)} bugs are in the {i}th quantile"
            )

        return classes, list(range(len(quantiles) + 1))
    def get_landed_and_filed_since(self, days: int) -> List[int]:
        since = datetime.utcnow() - timedelta(days=days)

        commits = [
            commit for commit in repository.get_commits()
            if dateutil.parser.parse(commit["pushdate"]) >= since
            and commit["bug_id"]
        ]

        bug_ids = set(commit["bug_id"] for commit in commits)
        bug_ids.update(bug["id"] for bug in bugzilla.get_bugs()
                       if dateutil.parser.parse(bug["creation_time"]).replace(
                           tzinfo=None) >= since and bug["resolution"] not in [
                               "INVALID",
                               "WONTFIX",
                               "INACTIVE",
                               "DUPLICATE",
                               "INCOMPLETE",
                               "MOVED",
                           ])

        return list(bug_ids)
Пример #24
0
    def get_similar_bugs(self, bug):
        similar_bug_ids = self.search_similar_bugs(bug)
        if self.duplicatemodel:
            similar_bugs = [
                bug for bug in bugzilla.get_bugs() if bug["id"] in similar_bug_ids
            ]
            bug_couples = [(bug, similar_bug) for similar_bug in similar_bugs]
            probs_bug_couples = sorted(
                zip(
                    self.duplicatemodel.classify(bug_couples, probabilities=True),
                    bug_couples,
                ),
                key=lambda v: -v[0][1],
            )

            similar_bug_ids = [
                similar_bug["id"]
                for prob, (bug, similar_bug) in probs_bug_couples
                if prob[1] > self.confidence_threshold
            ]

        return similar_bug_ids
Пример #25
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data["id"])

            found_dev_doc = False
            if any(
                keyword in bug_data["keywords"]
                for keyword in ["dev-doc-needed", "dev-doc-complete"]
            ):
                classes[bug_id] = 1
                found_dev_doc = True

            if not found_dev_doc:
                for entry in bug_data["history"]:
                    for change in entry["changes"]:
                        # Bugs that get dev-doc-needed removed from them at some point after it's been added (this suggests a false positive among human-analyzed bugs)
                        if (
                            change["field_name"] == "keywords"
                            and "dev-doc-needed" in change["removed"]
                            and "dev-doc-complete" not in change["added"]
                        ):
                            classes[bug_id] = 0
                        # Bugs that go from dev-doc-needed to dev-doc-complete are guaranteed to be good
                        # Bugs that go from not having dev-doc-needed to having dev-doc-complete are bugs
                        # that were missed by previous scans through content but someone realized it
                        # should have been flagged and updated the docs, found the docs already updated.
                        elif change["field_name"] == "keywords" and any(
                            keyword in change["added"]
                            for keyword in ["dev-doc-needed", "dev-doc-complete"]
                        ):
                            classes[bug_id] = 1

            if bug_id not in classes:
                classes[bug_id] = 0

        return classes, [0, 1]
Пример #26
0
    def __init__(self,
                 cleanup_urls=True,
                 nltk_tokenizer=False,
                 confidence_threshold=0.8):
        super().__init__(
            cleanup_urls=cleanup_urls,
            nltk_tokenizer=nltk_tokenizer,
            confidence_threshold=confidence_threshold,
        )
        self.corpus = []

        for bug in bugzilla.get_bugs():

            textual_features = self.text_preprocess(self.get_text(bug))
            self.corpus.append([bug["id"], textual_features])

        # Assigning unique integer ids to all words
        self.dictionary = Dictionary(text for bug_id, text in self.corpus)

        # Conversion to BoW
        corpus_final = [
            self.dictionary.doc2bow(text) for bug_id, text in self.corpus
        ]

        # Initializing and applying the tfidf transformation model on same corpus,resultant corpus is of same dimensions
        tfidf = models.TfidfModel(corpus_final)
        corpus_tfidf = tfidf[corpus_final]

        # Transform TF-IDF corpus to latent 300-D space via Latent Semantic Indexing
        self.lsi = models.LsiModel(corpus_tfidf,
                                   id2word=self.dictionary,
                                   num_topics=300)
        corpus_lsi = self.lsi[corpus_tfidf]

        # Indexing the corpus
        self.index = similarities.Similarity(output_prefix="simdata.shdat",
                                             corpus=corpus_lsi,
                                             num_features=300)
    def go(self, days: int) -> None:
        bugs = self.get_landed_and_filed_since(days)

        meta_bugs = self.get_blocking_of(self.get_meta_bugs(days))
        bugs += meta_bugs.keys()
        bugs += sum(meta_bugs.values(), [])

        bugs = list(set(bugs))

        test_infos = self.retrieve_test_info(days)
        test_info_bugs: List[int] = [
            bug["id"] for test_info in test_infos.values()
            for bug in test_info["bugs"]
        ]

        logger.info("Download bugs of interest...")
        bugzilla.download_bugs(bugs + test_info_bugs)

        logger.info(f"{len(bugs)} bugs to analyze.")

        bugs_set = set(bugs + test_info_bugs)

        bug_map = {}
        regressor_bug_ids = set()
        for bug in bugzilla.get_bugs():
            # Only add to the map bugs we are interested in, and bugs that block other bugs (needed for the bug_to_types call).
            if bug["id"] in bugs_set or len(bug["blocks"]) > 0:
                bug_map[bug["id"]] = bug

            if len(bug["regressions"]) > 0:
                regressor_bug_ids.add(bug["id"])

        self.generate_landings_by_date(bug_map, regressor_bug_ids, bugs,
                                       meta_bugs)

        self.generate_component_connections(bug_map, bugs)

        self.generate_component_test_stats(bug_map, test_infos)
Пример #28
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs():
            if "cf_has_str" in bug_data:
                if bug_data["cf_has_str"] == "no":
                    classes[int(bug_data["id"])] = 0
                elif bug_data["cf_has_str"] == "yes":
                    classes[int(bug_data["id"])] = 1
            elif "stepswanted" in bug_data["keywords"]:
                classes[int(bug_data["id"])] = 0
            else:
                for entry in bug_data["history"]:
                    for change in entry["changes"]:
                        if change["removed"].startswith("stepswanted"):
                            classes[int(bug_data["id"])] = 1

        print("{} bugs have no steps to reproduce".format(
            sum(1 for label in classes.values() if label == 0)))
        print("{} bugs have steps to reproduce".format(
            sum(1 for label in classes.values() if label == 1)))

        return classes, [0, 1]
Пример #29
0
def test_rollback():
    model = QANeededModel()

    histories = {}
    for bug in bugzilla.get_bugs():
        histories[int(bug["id"])] = bug["history"]

    def rollback_point(bug_id):
        count = 0
        for history in histories[bug_id]:
            for change in history["changes"]:
                if model.rollback(change):
                    return count
                count += 1
        return count

    assert (rollback_point(1390433) == 35
            ), "A bug field should start with qawanted or qe-verify"
    assert (rollback_point(1389136) == 9
            ), "A bug field should start with qawanted or qe-verify"

    assert rollback_point(1388990) == 29
    assert rollback_point(1389223) == 8
Пример #30
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs():
            if bug_data['assigned_to_detail']['email'] in ADDRESSES_TO_EXCLUDE:
                continue

            bug_id = int(bug_data['id'])
            classes[bug_id] = bug_data['assigned_to_detail']['email']

        assignee_counts = Counter(classes.values()).most_common()
        top_assignees = set(assignee for assignee, count in assignee_counts
                            if count > MINIMUM_ASSIGNMENTS)

        print(f'{len(top_assignees)} assignees')
        for assignee, count in assignee_counts:
            print(f'{assignee}: {count}')

        return {
            bug_id: assignee
            for bug_id, assignee in classes.items()
            if assignee in top_assignees
        }
Пример #31
0
    def evaluation(self):
        total_r = 0
        hits_r = 0
        total_p = 0
        hits_p = 0

        for bug in bugzilla.get_bugs():
            if duplicates[bug["id"]]:
                similar_bugs = self.get_similar_bugs(bug)

                # Recall
                for item in duplicates[bug["id"]]:
                    total_r += 1
                    if item in similar_bugs:
                        hits_r += 1

                # Precision
                for element in similar_bugs:
                    total_p += 1
                    if element in duplicates[bug["id"]]:
                        hits_p += 1

        print(f"Recall: {hits_r/total_r * 100}%")
        print(f"Precision: {hits_p/total_p * 100}%")
Пример #32
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data["id"])

            found_qa = False
            if any(
                    keyword.startswith(label)
                    for keyword in bug_data["keywords"]
                    for label in ["qawanted", "qe-verify", "qaurgent"]):
                classes[bug_id] = 1
                found_qa = True

            if not found_qa:
                for entry in bug_data["history"]:
                    for change in entry["changes"]:
                        if any(change["added"].startswith(label) for label in
                               ["qawanted", "qe-verify", "qaurgent"]):
                            classes[bug_id] = 1
            if bug_id not in classes:
                classes[bug_id] = 0

        return classes, [0, 1]
Пример #33
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs(include_invalid=True):
            bug_id = bug_data["id"]

            # Skip bugs filed by Mozillians, since we are sure they are not spam.
            if "@mozilla" in bug_data["creator"]:
                continue

            # Legitimate bugs
            if bug_data["resolution"] == "FIXED":
                classes[bug_id] = 0

            # Spam bugs
            elif bug_data["product"] == "Invalid Bugs":
                classes[bug_id] = 1

        print("{} bugs are classified as non-spam".format(
            sum(1 for label in classes.values() if label == 0)))
        print("{} bugs are classified as spam".format(
            sum(1 for label in classes.values() if label == 1)))

        return classes, [0, 1]
Пример #34
0
def main(args):

    if args.algorithm == "elasticsearch":
        model = similarity.model_name_to_class[args.algorithm]()
    else:
        model_file_name = f"{similarity.model_name_to_class[args.algorithm].__name__.lower()}.similaritymodel"

        if not os.path.exists(model_file_name):
            logger.info(
                f"{model_file_name} does not exist. Downloading the model....")
            try:
                download_check_etag(URL.format(model_file_name))
            except requests.HTTPError:
                logger.error(
                    f"A pre-trained model is not available, you will need to train it yourself using the trainer script"
                )
                raise SystemExit(1)

            zstd_decompress(model_file_name)
            assert os.path.exists(
                model_file_name), "Decompressed file doesn't exist"

        model = similarity.model_name_to_class[args.algorithm].load(
            f"{similarity.model_name_to_class[args.algorithm].__name__.lower()}.similaritymodel"
        )

    bug_ids = model.get_similar_bugs(bugzilla.get(args.bug_id)[args.bug_id])

    bugs = {}
    for bug in bugzilla.get_bugs():
        if bug["id"] in bug_ids or bug["id"] == args.bug_id:
            bugs[bug["id"]] = bug

    print("{}: {}".format(args.bug_id, bugs[args.bug_id]["summary"]))
    for bug_id in bug_ids:
        print("{}: {}".format(bug_id, bugs[bug_id]["summary"]))
Пример #35
0
    def get_labels(self):
        classes = {}

        for bug_data in bugzilla.get_bugs():
            bug_id = int(bug_data["id"])
            if "regressionwindow-wanted" in bug_data["keywords"]:
                classes[bug_id] = 0
            elif "cf_has_regression_range" in bug_data:
                if bug_data["cf_has_regression_range"] == "yes":
                    classes[bug_id] = 1
                elif bug_data["cf_has_regression_range"] == "no":
                    classes[bug_id] = 0
        print(
            "{} bugs have regression range".format(
                sum(1 for label in classes.values() if label == 1)
            )
        )
        print(
            "{} bugs don't have a regression range".format(
                sum(1 for label in classes.values() if label == 0)
            )
        )

        return classes, [0, 1]
Пример #36
0
 def bugs():
     return (bug for bug in bugzilla.get_bugs() if bug['id'] in classes)
Пример #37
0

def get_inconsistencies(bugs):
    inconsistencies = []

    for bug in bugs:
        try:
            rollback(bug, do_assert=True)
        except Exception as e:
            print(bug["id"])
            print(e)
            inconsistencies.append(bug)

    return inconsistencies


if __name__ == "__main__":
    import argparse

    from tqdm import tqdm

    parser = argparse.ArgumentParser()
    parser.add_argument("--verbose", help="Verbose mode", action="store_true")
    args = parser.parse_args()

    for bug in tqdm(bugzilla.get_bugs()):
        if args.verbose:
            print(bug["id"])

        rollback(bug, do_assert=True)
Пример #38
0
elif args.goal == "regressionrange":
    from bugbug.models.regression import RegressionModel

    model = RegressionModel.load("regressionmodel")

file_path = os.path.join("bugbug", "labels", f"{args.goal}.csv")

with open(file_path, "r") as f:
    reader = csv.reader(f)
    next(reader)
    labeled_comments = [(int(r[0]), int(r[1]), r[2]) for r in reader]

already_done = set((c[0], c[1]) for c in labeled_comments)

bugs = []
for bug in bugzilla.get_bugs():
    # For the str and regressionrange problems, we don't care about test failures,
    if (
        "intermittent-failure" in bug["keywords"]
        or "stockwell" in bug["whiteboard"]
        or "permafail" in bug["summary"].lower()
    ):
        continue

    # bugs filed from Socorro,
    if (
        "this bug was filed from the socorro interface"
        in bug["comments"][0]["text"].lower()
    ):
        continue
Пример #39
0
def classify_bugs(model_name, classifier, bug_id):
    if classifier != "default":
        assert (
            model_name in MODELS_WITH_TYPE
        ), f"{classifier} is not a valid classifier type for {model_name}"

        model_file_name = f"{model_name}{classifier}model"
        model_name = f"{model_name}_{classifier}"
    else:
        model_file_name = f"{model_name}model"

    if not os.path.exists(model_file_name):
        logger.info(
            f"{model_file_name} does not exist. Downloading the model....")
        try:
            download_check_etag(
                f"https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug.train_{model_name}.latest/artifacts/public/{model_file_name}.zst"
            )
        except requests.HTTPError:
            logger.error(
                f"A pre-trained model is not available, you will need to train it yourself using the trainer script"
            )
            raise SystemExit(1)

        zstd_decompress(model_file_name)
        assert os.path.exists(
            model_file_name), "Decompressed file doesn't exist"

    model_class = get_model_class(model_name)
    model = model_class.load(model_file_name)

    if bug_id:
        bugs = bugzilla.get(bug_id).values()
        assert bugs, f"A bug with a bug id of {bug_id} was not found"
    else:
        assert db.download(bugzilla.BUGS_DB)
        bugs = bugzilla.get_bugs()

    for bug in bugs:
        print(
            f'https://bugzilla.mozilla.org/show_bug.cgi?id={bug["id"]} - {bug["summary"]} '
        )

        if model.calculate_importance:
            probas, importance = model.classify(bug,
                                                probabilities=True,
                                                importances=True)

            model.print_feature_importances(importance["importances"],
                                            class_probabilities=probas)
        else:
            probas = model.classify(bug, probabilities=True, importances=False)

        probability = probas[0]
        pred_index = np.argmax(probability)
        if len(probability) > 2:
            pred_class = model.le.inverse_transform([pred_index])[0]
        else:
            pred_class = "Positive" if pred_index == 1 else "Negative"
        print(f"{pred_class} {probability}")
        input()
Пример #40
0
    def get_bugbug_labels(self, kind="bug"):
        assert kind in ["bug", "regression", "defect_enhancement_task"]

        classes = {}

        for bug_id, category in labels.get_labels("bug_nobug"):
            assert category in ["True", "False"], f"unexpected category {category}"
            if kind == "bug":
                classes[int(bug_id)] = 1 if category == "True" else 0
            elif kind == "regression":
                if category == "False":
                    classes[int(bug_id)] = 0
            elif kind == "defect_enhancement_task":
                if category == "True":
                    classes[int(bug_id)] = "defect"

        for bug_id, category in labels.get_labels("regression_bug_nobug"):
            assert category in [
                "nobug",
                "bug_unknown_regression",
                "bug_no_regression",
                "regression",
            ], f"unexpected category {category}"
            if kind == "bug":
                classes[int(bug_id)] = 1 if category != "nobug" else 0
            elif kind == "regression":
                if category == "bug_unknown_regression":
                    continue

                classes[int(bug_id)] = 1 if category == "regression" else 0
            elif kind == "defect_enhancement_task":
                if category != "nobug":
                    classes[int(bug_id)] = "defect"

        defect_enhancement_task_e = {
            bug_id: category
            for bug_id, category in labels.get_labels("defect_enhancement_task_e")
        }
        defect_enhancement_task_p = {
            bug_id: category
            for bug_id, category in labels.get_labels("defect_enhancement_task_p")
        }
        defect_enhancement_task_s = {
            bug_id: category
            for bug_id, category in labels.get_labels("defect_enhancement_task_s")
        }
        defect_enhancement_task_h = {
            bug_id: category
            for bug_id, category in labels.get_labels("defect_enhancement_task_h")
        }

        defect_enhancement_task_common = (
            (bug_id, category)
            for bug_id, category in defect_enhancement_task_p.items()
            if (
                bug_id not in defect_enhancement_task_e
                or defect_enhancement_task_e[bug_id]
                == defect_enhancement_task_p[bug_id]
            )
            and (
                bug_id not in defect_enhancement_task_s
                or defect_enhancement_task_s[bug_id]
                == defect_enhancement_task_p[bug_id]
            )
            and (
                bug_id not in defect_enhancement_task_h
                or defect_enhancement_task_h[bug_id]
                == defect_enhancement_task_p[bug_id]
            )
        )

        for bug_id, category in itertools.chain(
            labels.get_labels("defect_enhancement_task"), defect_enhancement_task_common
        ):
            assert category in ["defect", "enhancement", "task"]
            if kind == "bug":
                classes[int(bug_id)] = 1 if category == "defect" else 0
            elif kind == "regression":
                if category in ["enhancement", "task"]:
                    classes[int(bug_id)] = 0
            elif kind == "defect_enhancement_task":
                classes[int(bug_id)] = category

        # Augment labes by using bugs marked as 'regression' or 'feature', as they are basically labelled.
        # And also use the new bug type field.
        bug_ids = set()
        for bug in bugzilla.get_bugs():
            bug_id = int(bug["id"])

            bug_ids.add(bug_id)

            if bug_id in classes:
                continue

            if any(
                keyword in bug["keywords"]
                for keyword in ["regression", "talos-regression"]
            ) or (
                "cf_has_regression_range" in bug
                and bug["cf_has_regression_range"] == "yes"
            ):
                if kind in ["bug", "regression"]:
                    classes[bug_id] = 1
                else:
                    classes[bug_id] = "defect"
            elif any(keyword in bug["keywords"] for keyword in ["feature"]):
                if kind in ["bug", "regression"]:
                    classes[bug_id] = 0
                else:
                    classes[bug_id] = "enhancement"
            elif kind == "regression":
                for history in bug["history"]:
                    for change in history["changes"]:
                        if change["field_name"] == "keywords":
                            if "regression" in change["removed"].split(","):
                                classes[bug_id] = 0
                            elif "regression" in change["added"].split(","):
                                classes[bug_id] = 1

            # The conditions to use the 'defect' type are more restricted.
            can_use_type = False
            can_use_defect_type = False

            # We can use the type as a label for all bugs after the migration (https://bugzilla.mozilla.org/show_bug.cgi?id=1524738), if they are not defects.
            if bug["id"] > 1_540_807:
                can_use_type = True

            # And we can use the type as a label for bugs whose type has been modified.
            # For 'defects', we can't use them as labels unless resulting from a change, because bugs are filed by default as 'defect' and so they could be mistakes.
            if not can_use_type or bug["type"] == "defect":
                for history in bug["history"]:
                    for change in history["changes"]:
                        if change["field_name"] == "type":
                            can_use_type = can_use_defect_type = True

            if can_use_type:
                if bug["type"] == "enhancement":
                    if kind == "bug":
                        classes[int(bug_id)] = 0
                    elif kind == "regression":
                        classes[int(bug_id)] = 0
                    elif kind == "defect_enhancement_task":
                        classes[int(bug_id)] = "enhancement"
                elif bug["type"] == "task":
                    if kind == "bug":
                        classes[int(bug_id)] = 0
                    elif kind == "regression":
                        classes[int(bug_id)] = 0
                    elif kind == "defect_enhancement_task":
                        classes[int(bug_id)] = "task"
                elif bug["type"] == "defect" and can_use_defect_type:
                    if kind == "bug":
                        classes[int(bug_id)] = 1
                    elif kind == "defect_enhancement_task":
                        classes[int(bug_id)] = "defect"

        # Remove labels which belong to bugs for which we have no data.
        return {bug_id: label for bug_id, label in classes.items() if bug_id in bug_ids}
Пример #41
0
    def evaluation(self):
        # A map from bug ID to its duplicate IDs
        duplicates = defaultdict(set)
        all_ids = set(
            bug["id"]
            for bug in bugzilla.get_bugs()
            if bug["creator"] not in REPORTERS_TO_IGNORE
            and "dupeme" not in bug["keywords"]
        )

        for bug in bugzilla.get_bugs():
            dupes = [entry for entry in bug["duplicates"] if entry in all_ids]
            if bug["dupe_of"] in all_ids:
                dupes.append(bug["dupe_of"])

            duplicates[bug["id"]].update(dupes)
            for dupe in dupes:
                duplicates[dupe].add(bug["id"])

        total_r = 0
        hits_r = 0
        total_p = 0
        hits_p = 0

        recall_rate_1 = 0
        recall_rate_5 = 0
        recall_rate_10 = 0
        precision_rate_1 = 0
        precision_rate_5 = 0
        precision_rate_10 = 0

        queries = 0
        apk = []
        for bug in tqdm(bugzilla.get_bugs()):
            if duplicates[bug["id"]]:
                score = 0
                num_hits = 0
                queries += 1
                similar_bugs = self.get_similar_bugs(bug)[:10]

                # Recall
                for idx, item in enumerate(duplicates[bug["id"]]):
                    total_r += 1
                    if item in similar_bugs:
                        hits_r += 1
                        if idx == 0:
                            recall_rate_1 += 1
                        if idx < 5:
                            recall_rate_5 += 1
                        if idx < 10:
                            recall_rate_10 += 1

                # Precision
                for idx, element in enumerate(similar_bugs):
                    total_p += 1
                    if element in duplicates[bug["id"]]:
                        hits_p += 1
                        if idx == 0:
                            precision_rate_1 += 1

                        if idx < 5:
                            precision_rate_5 += 1 / 5

                        if idx < 10:
                            precision_rate_10 += 1 / 10

                        num_hits += 1
                        score += num_hits / (idx + 1)

                apk.append(score / min(len(duplicates[bug["id"]]), 10))

        print(f"Recall @ 1: {recall_rate_1/total_r * 100}%")
        print(f"Recall @ 5: {recall_rate_5/total_r * 100}%")
        print(f"Recall @ 10: {recall_rate_10/total_r * 100}%")
        print(f"Precision @ 1: {precision_rate_1/queries * 100}%")
        print(f"Precision @ 5: {precision_rate_5/queries * 100}%")
        print(f"Precision @ 10: {precision_rate_10/queries * 100}%")
        print(f"Recall: {hits_r/total_r * 100}%")
        print(f"Precision: {hits_p/total_p * 100}%")
        print(f"MAP@k : {np.mean(apk) * 100}%")
Пример #42
0
    assert len(bug['comments']) >= 1

    return bug


def get_inconsistencies(find_all=False):
    inconsistencies = []

    for bug in bugzilla.get_bugs():
        try:
            rollback(bug, None, False, find_all)
        except Exception as e:
            print(bug['id'])
            print(e)
            inconsistencies.append(bug['id'])

    return inconsistencies


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--verbose', help='Verbose mode', action='store_true')
    args = parser.parse_args()

    for i, bug in enumerate(bugzilla.get_bugs()):
        if args.verbose:
            print(bug['id'])
            print(i)
        rollback(bug, None, False)
Пример #43
0
    return bug


def get_inconsistencies(find_all=False):
    inconsistencies = []

    for bug in bugzilla.get_bugs():
        try:
            rollback(bug, None, False, find_all)
        except Exception as e:
            print(bug["id"])
            print(e)
            inconsistencies.append(bug["id"])

    return inconsistencies


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--verbose", help="Verbose mode", action="store_true")
    args = parser.parse_args()

    for i, bug in enumerate(bugzilla.get_bugs()):
        if args.verbose:
            print(bug["id"])
            print(i)
        rollback(bug, None, False)
    def go(self,
           bugs: List[int],
           meta_bugs: Optional[List[int]] = None) -> None:
        if meta_bugs is not None:
            bugs += meta_bugs + self.get_blocking_of(meta_bugs)

        logger.info("Download bugs of interest...")
        bugzilla.download_bugs(bugs)

        component_team_mapping = bugzilla.get_component_team_mapping()

        bugs_set = set(bugs)

        commits = [
            commit for commit in repository.get_commits()
            if commit["bug_id"] in bugs_set
        ]
        commit_map = {commit["node"]: commit for commit in commits}
        hash_to_rev = {commit["node"]: i for i, commit in enumerate(commits)}

        logger.info(f"{len(commits)} commits to analyze.")

        logger.info(f"{len(bugs_set)} bugs to analyze.")

        bug_map = {}
        regressor_bug_ids = set()
        for bug in bugzilla.get_bugs():
            bug_map[bug["id"]] = bug

            if len(bug["regressions"]) > 0:
                regressor_bug_ids.add(bug["id"])

        logger.info("Retrieve Phabricator revisions linked to commits...")
        revision_ids = set(
            filter(None,
                   (repository.get_revision_id(commit) for commit in commits)))

        logger.info("Download revisions of interest...")
        phabricator.download_revisions(revision_ids)

        revision_map = {
            revision["id"]: revision
            for revision in phabricator.get_revisions()
            if revision["id"] in revision_ids
        }

        if meta_bugs is not None:
            blocker_to_meta = collections.defaultdict(set)
            for meta_bug in meta_bugs:
                if meta_bug not in bug_map:
                    continue

                for blocker_bug_id in bugzilla.find_blocking(
                        bug_map, bug_map[meta_bug]):
                    blocker_to_meta[blocker_bug_id].add(meta_bug)

        def _download_past_bugs(url: str) -> dict:
            path = os.path.join("data", os.path.basename(url)[:-4])
            download_check_etag(url, path=f"{path}.zst")
            zstd_decompress(path)
            assert os.path.exists(path)
            with open(path, "r") as f:
                return json.load(f)

        past_regressions_by = {}
        past_fixed_bugs_by = {}
        past_regression_blocked_bugs_by = {}
        past_fixed_bug_blocked_bugs_by = {}

        for dimension in ["component", "directory", "file", "function"]:
            past_regressions_by[dimension] = _download_past_bugs(
                PAST_REGRESSIONS_BY_URL.format(dimension=dimension))
            past_fixed_bugs_by[dimension] = _download_past_bugs(
                PAST_FIXED_BUGS_BY_URL.format(dimension=dimension))
            past_regression_blocked_bugs_by[dimension] = _download_past_bugs(
                PAST_REGRESSION_BLOCKED_BUGS_BY_URL.format(
                    dimension=dimension))
            past_fixed_bug_blocked_bugs_by[dimension] = _download_past_bugs(
                PAST_FIXED_BUG_BLOCKED_BUGS_BY_URL.format(dimension=dimension))

        path_to_component = repository.get_component_mapping()

        def get_full_component(bug):
            return "{}::{}".format(bug["product"], bug["component"])

        def histogram(components: List[str]) -> Dict[str, float]:
            counter = collections.Counter(components)
            return {
                component: count / len(components)
                for component, count in counter.most_common()
            }

        def component_histogram(bugs: List[dict]) -> Dict[str, float]:
            return histogram([bug["component"] for bug in bugs])

        def find_risk_band(risk: float) -> str:
            for name, start, end in self.risk_bands:
                if start <= risk <= end:
                    return name

            assert False

        def get_prev_bugs(past_bugs_by: dict,
                          commit: repository.CommitDict,
                          component: str = None) -> List[dict]:
            paths = [
                path for path in commit["files"]
                if component is None or (path.encode(
                    "utf-8") in path_to_component and path_to_component[
                        path.encode("utf-8")] == component.encode("utf-8"))
            ]

            past_bugs = []

            for path, f_group in commit["functions"].items():
                if path not in paths:
                    continue

                if path not in past_bugs_by["function"]:
                    continue

                found = False
                for f in f_group:
                    if f[0] not in past_bugs_by["function"][path]:
                        continue

                    found = True
                    past_bugs += past_bugs_by["function"][path][f[0]]

                if found:
                    paths.remove(path)

            for path in paths:
                if path in past_bugs_by["file"]:
                    past_bugs += past_bugs_by["file"][path]
                    paths.remove(path)

            for path, directories in zip(paths,
                                         repository.get_directories(paths)):
                found = False
                for directory in directories:
                    if directory in past_bugs_by["directory"]:
                        found = True
                        past_bugs += past_bugs_by["directory"][directory]

                if found:
                    paths.remove(path)

            components = [
                path_to_component[path.encode("utf-8")].tobytes().decode(
                    "utf-8") for path in paths
                if path.encode("utf-8") in path_to_component
            ]

            for component in components:
                if component in past_bugs_by["component"]:
                    past_bugs += past_bugs_by["component"][component]

            return past_bugs

        def get_prev_bugs_stats(
            commit_group: dict,
            commit_list: List[repository.CommitDict],
            component: str = None,
        ) -> None:
            # Find previous regressions occurred in the same files as those touched by these commits.
            # And find previous bugs that were fixed by touching the same files as these commits.
            # And find previous bugs that were blocked by regressions occurred in the same files as those touched by these commits.
            # And find previous bugs that were blocked by bugs that were fixed by touching the same files as those touched by these commits.
            prev_regressions: List[Dict[str, Any]] = sum(
                (get_prev_bugs(past_regressions_by, commit, component)
                 for commit in commit_list),
                [],
            )
            prev_fixed_bugs: List[Dict[str, Any]] = sum(
                (get_prev_bugs(past_fixed_bugs_by, commit, component)
                 for commit in commit_list),
                [],
            )
            prev_regression_blocked_bugs: List[Dict[str, Any]] = sum(
                (get_prev_bugs(past_regression_blocked_bugs_by, commit,
                               component) for commit in commit_list),
                [],
            )
            prev_fixed_bug_blocked_bugs: List[Dict[str, Any]] = sum(
                (get_prev_bugs(past_fixed_bug_blocked_bugs_by, commit,
                               component) for commit in commit_list),
                [],
            )

            prev_regressions = _deduplicate(prev_regressions)
            prev_fixed_bugs = _deduplicate(prev_fixed_bugs)
            prev_regression_blocked_bugs = _deduplicate(
                prev_regression_blocked_bugs)
            prev_fixed_bug_blocked_bugs = _deduplicate(
                prev_fixed_bug_blocked_bugs)

            regression_components = component_histogram(prev_regressions)
            fixed_bugs_components = component_histogram(prev_fixed_bugs)
            regression_blocked_bug_components = component_histogram(
                prev_regression_blocked_bugs)
            fixed_bug_blocked_bug_components = component_histogram(
                prev_fixed_bug_blocked_bugs)

            commit_group[
                "most_common_regression_components"] = regression_components
            # These are only used for component connections for the time being.
            if component:
                commit_group["prev_regressions"] = prev_regressions[-3:]
                commit_group["prev_fixed_bugs"] = prev_fixed_bugs[-3:]
                commit_group[
                    "prev_regression_blocked_bugs"] = prev_regression_blocked_bugs[
                        -3:]
                commit_group[
                    "prev_fixed_bug_blocked_bugs"] = prev_fixed_bug_blocked_bugs[
                        -3:]
                commit_group[
                    "most_common_fixed_bugs_components"] = fixed_bugs_components
                commit_group[
                    "most_common_regression_blocked_bug_components"] = regression_blocked_bug_components
                commit_group[
                    "most_common_fixed_bug_blocked_bug_components"] = fixed_bug_blocked_bug_components

        def get_commit_data(
                commit_list: List[repository.CommitDict]) -> List[dict]:
            if len(commit_list) == 0:
                return []

            # Evaluate risk of commits associated to this bug.
            probs = self.regressor_model.classify(commit_list,
                                                  probabilities=True)

            commits_data = []
            for i, commit in enumerate(commit_list):
                revision_id = repository.get_revision_id(commit)
                if revision_id in revision_map:
                    testing = phabricator.get_testing_project(
                        revision_map[revision_id])

                    if testing is None:
                        testing = "missing"
                else:
                    testing = None

                commits_data.append({
                    "id":
                    commit["node"],
                    "testing":
                    testing,
                    "risk":
                    float(probs[i][1]),
                    "backedout":
                    bool(commit["backedoutby"]),
                    "author":
                    commit["author_email"],
                    "reviewers":
                    commit["reviewers"],
                    "coverage": [
                        commit["cov_added"],
                        commit["cov_covered"],
                        commit["cov_unknown"],
                    ],
                })

            return commits_data

        # Sort commits by bug ID, so we can use itertools.groupby to group them by bug ID.
        commits.sort(key=lambda x: x["bug_id"])

        bug_to_commits = {}
        for bug_id, commit_iter in itertools.groupby(commits,
                                                     lambda x: x["bug_id"]):
            # TODO: Figure out what to do with bugs we couldn't download (security bugs).
            if bug_id not in bug_map:
                continue

            bug_to_commits[bug_id] = sorted(
                commit_iter, key=lambda x: hash_to_rev[x["node"]])

        bug_summaries = []
        for bug_id in bugs:
            if bug_id not in bug_map:
                continue

            commit_list = bug_to_commits.get(bug_id, [])
            commit_data = get_commit_data(commit_list)

            bug = bug_map[bug_id]

            bug_summary = {
                "id":
                bug_id,
                "regressor":
                bug_id in regressor_bug_ids,
                "regression":
                len(bug["regressed_by"]) > 0
                or any(keyword in bug["keywords"]
                       for keyword in ["regression", "talos-regression"])
                or ("cf_has_regression_range" in bug
                    and bug["cf_has_regression_range"] == "yes"),
                "whiteboard":
                bug["whiteboard"],
                "assignee":
                bug["assigned_to"]
                if bug["assigned_to"] != "*****@*****.**" else None,
                "versions":
                bugzilla.get_fixed_versions(bug),
                "component":
                get_full_component(bug),
                "team":
                bugzilla.component_to_team(component_team_mapping,
                                           bug["product"], bug["component"]),
                "summary":
                bug["summary"],
                "types":
                bug_to_types(bug),
                "severity":
                bug["severity"],
                "creation_date":
                dateutil.parser.parse(
                    bug["creation_time"]).strftime("%Y-%m-%d"),
                "date":
                max(
                    dateutil.parser.parse(commit["pushdate"])
                    for commit in commit_list).strftime("%Y-%m-%d")
                if len(commit_list) > 0 else None,
                "commits":
                commit_data,
                "meta_ids":
                list(blocker_to_meta[bug_id]),
                "risk_band":
                find_risk_band(max(commit["risk"] for commit in commit_data))
                if len(commit_data) > 0 else None,
            }

            get_prev_bugs_stats(bug_summary, commit_list)

            bug_summaries.append(bug_summary)

        landings_by_date = collections.defaultdict(list)
        for bug_summary in bug_summaries:
            landings_by_date[bug_summary["creation_date"]].append(bug_summary)

        with open("landings_by_date.json", "w") as f:
            output: dict = {
                "summaries": landings_by_date,
            }
            if meta_bugs is not None:
                output["featureMetaBugs"] = [{
                    "id":
                    meta_bug,
                    "summary":
                    bug_map[meta_bug]["summary"]
                } for meta_bug in meta_bugs]

            json.dump(output, f)

        # Retrieve components of test failures that occurred when landing patches to fix bugs in specific components.
        component_failures = collections.defaultdict(list)

        push_data_iter, push_data_count, all_runnables = test_scheduling.get_push_data(
            "group")

        for revisions, _, _, possible_regressions, likely_regressions in tqdm(
                push_data_iter(), total=push_data_count):
            commit_list = [
                commit_map[revision] for revision in revisions
                if revision in commit_map
            ]
            if len(commit_list) == 0:
                continue

            commit_bugs = [
                bug_map[commit["bug_id"]] for commit in commit_list
                if commit["bug_id"] in bug_map
            ]

            components = list(
                set(get_full_component(bug) for bug in commit_bugs))

            groups = [
                group for group in list(
                    set(possible_regressions + likely_regressions))
                if group.encode("utf-8") in path_to_component
            ]

            for group in groups:
                for component in components:
                    component_failures[component].append(path_to_component[
                        group.encode("utf-8")].tobytes().decode("utf-8"))

        # Filter out commits for which we have no bugs.
        commits = [commit for commit in commits if commit["bug_id"] in bug_map]

        # Sort commits by bug component, so we can use itertools.groupby to group them by bug component.
        commits.sort(key=lambda x: get_full_component(bug_map[x["bug_id"]]))

        commit_groups = []
        for component, commit_iter in itertools.groupby(
                commits, lambda x: get_full_component(bug_map[x["bug_id"]])):
            commit_group = {
                "component": component,
                "most_common_test_failure_components":
                histogram(component_failures[component])
                if component in component_failures else {},
            }
            get_prev_bugs_stats(commit_group, list(commit_iter), component)
            commit_groups.append(commit_group)

        with open("component_connections.json", "w") as f:
            json.dump(commit_groups, f)

        repository.close_component_mapping()
Пример #45
0
def test_bug_snapshot():
    for i, bug in enumerate(bugzilla.get_bugs()):
        print(bug["id"])
        print(i)

        rollback(bug, None, False)