示例#1
0
    def transform(self, bugs):
        results = []

        reporter_experience_map = defaultdict(int)
        author_ids = get_author_ids() if self.commit_map else None

        for bug in bugs:
            bug_id = bug["id"]

            if self.rollback:
                bug = bug_snapshot.rollback(bug, self.rollback_when)

            data = {}

            if self.commit_map is not None:
                if bug_id in self.commit_map:
                    bug["commits"] = self.commit_map[bug_id]
                else:
                    bug["commits"] = []

            for f in self.feature_extractors:
                res = f(
                    bug,
                    reporter_experience=reporter_experience_map[
                        bug["creator"]],
                    author_ids=author_ids,
                )

                if res is None:
                    continue

                if isinstance(res, list):
                    for item in res:
                        data[f.__class__.__name__ + "-" + item] = "True"
                    continue

                if isinstance(res, bool):
                    res = str(res)

                data[f.__class__.__name__] = res

            reporter_experience_map[bug["creator"]] += 1

            # TODO: Try simply using all possible fields instead of extracting features manually.

            for cleanup_function in self.cleanup_functions:
                bug["summary"] = cleanup_function(bug["summary"])
                for c in bug["comments"]:
                    c["text"] = cleanup_function(c["text"])

            result = {
                "data": data,
                "title": bug["summary"],
                "first_comment": bug["comments"][0]["text"],
                "comments": " ".join([c["text"] for c in bug["comments"]]),
            }

            results.append(result)

        return pd.DataFrame(results)
示例#2
0
    def transform(self, bugs):
        results = []

        reporter_experience_map = defaultdict(int)
        author_ids = get_author_ids() if self.commit_map else None

        for bug in bugs:
            bug_id = bug["id"]

            if self.rollback:
                bug = bug_snapshot.rollback(bug, self.rollback_when)

            data = {}

            if self.commit_map is not None:
                if bug_id in self.commit_map:
                    bug["commits"] = self.commit_map[bug_id]
                else:
                    bug["commits"] = []

            for f in self.feature_extractors:
                res = f(
                    bug,
                    reporter_experience=reporter_experience_map[bug["creator"]],
                    author_ids=author_ids,
                )

                if res is None:
                    continue

                if isinstance(res, list):
                    for item in res:
                        data[f.__class__.__name__ + "-" + item] = "True"
                    continue

                if isinstance(res, bool):
                    res = str(res)

                data[f.__class__.__name__] = res

            reporter_experience_map[bug["creator"]] += 1

            # TODO: Try simply using all possible fields instead of extracting features manually.

            for cleanup_function in self.cleanup_functions:
                bug["summary"] = cleanup_function(bug["summary"])
                for c in bug["comments"]:
                    c["text"] = cleanup_function(c["text"])

            result = {
                "data": data,
                "title": bug["summary"],
                "first_comment": bug["comments"][0]["text"],
                "comments": " ".join([c["text"] for c in bug["comments"]]),
            }

            results.append(result)

        return pd.DataFrame(results)
示例#3
0
    def transform(self, bugs):
        results = []

        reporter_experience_map = defaultdict(int)
        author_ids = get_author_ids() if self.commit_map else None

        for bug in bugs:
            bug_id = bug['id']

            if self.rollback:
                bug = bug_snapshot.rollback(bug, self.rollback_when)

            data = {}

            if self.commit_map is not None:
                if bug_id in self.commit_map:
                    bug['commits'] = self.commit_map[bug_id]
                else:
                    bug['commits'] = []

            for f in self.feature_extractors:
                res = f(bug,
                        reporter_experience=reporter_experience_map[
                            bug['creator']],
                        author_ids=author_ids)

                if res is None:
                    continue

                if isinstance(res, list):
                    for item in res:
                        data[f.__class__.__name__ + '-' + item] = 'True'
                    continue

                if isinstance(res, bool):
                    res = str(res)

                data[f.__class__.__name__] = res

            reporter_experience_map[bug['creator']] += 1

            # TODO: Try simply using all possible fields instead of extracting features manually.

            for cleanup_function in self.cleanup_functions:
                bug['summary'] = cleanup_function(bug['summary'])
                for c in bug['comments']:
                    c['text'] = cleanup_function(c['text'])

            result = {
                'data': data,
                'title': bug['summary'],
                'first_comment': bug['comments'][0]['text'],
                'comments': ' '.join([c['text'] for c in bug['comments']]),
            }

            results.append(result)

        return pd.DataFrame(results)
示例#4
0
        def apply_transform(bug):
            bug_id = bug["id"]

            if self.rollback and bug_id not in already_rollbacked:
                bug = bug_snapshot.rollback(bug, self.rollback_when)
                already_rollbacked.add(bug_id)

            data = {}

            if self.commit_map is not None:
                if bug_id in self.commit_map:
                    bug["commits"] = self.commit_map[bug_id]
                else:
                    bug["commits"] = []

            for feature_extractor in self.feature_extractors:
                res = feature_extractor(
                    bug,
                    reporter_experience=reporter_experience_map[
                        bug["creator"]],
                    author_ids=author_ids,
                )

                feature_extractor_name = feature_extractor.__class__.__name__

                if res is None:
                    continue

                if isinstance(res, list):
                    for item in res:
                        data[f"{feature_extractor_name}-{item}"] = "True"
                    continue

                if isinstance(res, bool):
                    res = str(res)

                data[feature_extractor_name] = res

            reporter_experience_map[bug["creator"]] += 1

            # TODO: Try simply using all possible fields instead of extracting features manually.

            for cleanup_function in self.cleanup_functions:
                bug["summary"] = cleanup_function(bug["summary"])
                for c in bug["comments"]:
                    c["text"] = cleanup_function(c["text"])

            return {
                "data": data,
                "title": bug["summary"],
                "first_comment": bug["comments"][0]["text"],
                "comments": " ".join([c["text"] for c in bug["comments"]]),
            }
示例#5
0
    def transform(self, bugs):
        results = []

        for bug in bugs:
            bug_id = bug['id']

            if self.rollback:
                bug = bug_snapshot.rollback(bug, self.rollback_when)

            data = {}

            for f in self.feature_extractors:
                res = f(bug)

                if res is None:
                    continue

                if isinstance(res, list):
                    for item in res:
                        data[f.__class__.__name__ + '-' + item] = 'True'
                    continue

                if isinstance(res, bool):
                    res = str(res)

                data[f.__class__.__name__] = res

            # TODO: Try simply using all possible fields instead of extracting features manually.

            for cleanup_function in self.cleanup_functions:
                bug['summary'] = cleanup_function(bug['summary'])
                for c in bug['comments']:
                    c['text'] = cleanup_function(c['text'])

            result = {
                'data': data,
                'title': bug['summary'],
                'first_comment': bug['comments'][0]['text'],
                'comments': ' '.join([c['text'] for c in bug['comments']]),
            }

            if self.commit_messages_map is not None:
                result['commits'] = self.commit_messages_map[
                    bug_id] if bug_id in self.commit_messages_map else ''

            results.append(result)

        return pd.DataFrame(results)
示例#6
0
        def apply_transform(bug):

            is_couple = isinstance(bug, tuple)

            if not is_couple:
                bug_id = bug["id"]

                if self.rollback and bug_id not in already_rollbacked:
                    bug = bug_snapshot.rollback(bug, self.rollback_when)
                    already_rollbacked.add(bug_id)

            else:
                bug1_id = bug[0]["id"]
                bug2_id = bug[1]["id"]

                if self.rollback:
                    if bug1_id not in already_rollbacked:
                        bug[0] = bug_snapshot.rollback(bug[0],
                                                       self.rollback_when)
                        already_rollbacked.add(bug1_id)
                    if bug2_id not in already_rollbacked:
                        bug[1] = bug_snapshot.rollback(bug[1],
                                                       self.rollback_when)
                        already_rollbacked.add(bug2_id)

            data = {}

            for feature_extractor in self.feature_extractors:
                res = None
                if isinstance(feature_extractor,
                              single_bug_feature) and not is_couple:
                    res = feature_extractor(
                        bug,
                        reporter_experience=reporter_experience_map[
                            bug["creator"]],
                        author_ids=author_ids,
                    )

                elif isinstance(feature_extractor,
                                couple_bug_feature) and is_couple:
                    res = feature_extractor(bug)

                if hasattr(feature_extractor, "name"):
                    feature_extractor_name = feature_extractor.name
                else:
                    feature_extractor_name = feature_extractor.__class__.__name__

                if res is None:
                    continue

                if isinstance(res, list):
                    for item in res:
                        data[f"{item} in {feature_extractor_name}"] = "True"
                    continue

                if isinstance(res, bool):
                    res = str(res)

                data[feature_extractor_name] = res

            if is_couple:
                reporter_experience_map[bug[0]["creator"]] += 1
                reporter_experience_map[bug[1]["creator"]] += 1

                return {"data": data}

            else:
                reporter_experience_map[bug["creator"]] += 1

                # TODO: Try simply using all possible fields instead of extracting features manually.

                for cleanup_function in self.cleanup_functions:
                    bug["summary"] = cleanup_function(bug["summary"])
                    for c in bug["comments"]:
                        c["text"] = cleanup_function(c["text"])

                return {
                    "data": data,
                    "title": bug["summary"],
                    "first_comment": bug["comments"][0]["text"],
                    "comments": " ".join([c["text"] for c in bug["comments"]]),
                }
示例#7
0
def test_bug_snapshot():
    for i, bug in enumerate(bugzilla.get_bugs()):
        print(bug["id"])
        print(i)

        rollback(bug, None, False)
示例#8
0
        def apply_transform(bug):

            is_couple = isinstance(bug, tuple)

            if not is_couple:
                bug_id = bug["id"]

                if self.rollback and bug_id not in already_rollbacked:
                    bug = bug_snapshot.rollback(bug, self.rollback_when)
                    already_rollbacked.add(bug_id)

            else:
                bug1_id = bug[0]["id"]
                bug2_id = bug[1]["id"]

                if self.rollback:
                    if bug1_id not in already_rollbacked:
                        bug[0] = bug_snapshot.rollback(bug[0],
                                                       self.rollback_when)
                        already_rollbacked.add(bug1_id)
                    if bug2_id not in already_rollbacked:
                        bug[1] = bug_snapshot.rollback(bug[1],
                                                       self.rollback_when)
                        already_rollbacked.add(bug2_id)

            data = {}

            for feature_extractor in self.feature_extractors:
                res = None
                if isinstance(feature_extractor,
                              single_bug_feature) and not is_couple:
                    res = feature_extractor(
                        bug,
                        reporter_experience=reporter_experience_map[
                            bug["creator"]],
                        author_ids=author_ids,
                    )

                elif isinstance(feature_extractor,
                                couple_bug_feature) and is_couple:
                    res = feature_extractor(bug)

                if hasattr(feature_extractor, "name"):
                    feature_extractor_name = feature_extractor.name
                else:
                    feature_extractor_name = feature_extractor.__class__.__name__

                if res is None:
                    continue

                if isinstance(res, (list, set)):
                    for item in res:
                        data[sys.intern(
                            f"{item} in {feature_extractor_name}")] = True
                    continue

                data[feature_extractor_name] = res

            if is_couple:
                reporter_experience_map[bug[0]["creator"]] += 1
                reporter_experience_map[bug[1]["creator"]] += 1

                return {"data": data}

            else:
                reporter_experience_map[bug["creator"]] += 1

                summary = bug["summary"]
                comments = [c["text"] for c in bug["comments"]]
                for cleanup_function in self.cleanup_functions:
                    summary = cleanup_function(summary)
                    comments = [
                        cleanup_function(comment) for comment in comments
                    ]

                return {
                    "data": data,
                    "title": summary,
                    "first_comment": "" if len(comments) == 0 else comments[0],
                    "comments": " ".join(comments),
                }
示例#9
0
def test_bug_snapshot():
    for i, bug in enumerate(bugzilla.get_bugs()):
        print(bug["id"])
        print(i)

        rollback(bug, None, False)
示例#10
0
def test_bug_snapshot():
    for i, bug in enumerate(bugzilla.get_bugs()):
        print(bug["id"])
        print(i)

        rollback(bug, do_assert=True)