Пример #1
0
    def evaluate(self):
        """Run fixcache, then calculate TP/TN/FP/FN."""
        self.run_fixcache()
        print self.hit_count
        print self.miss_count

        cache_set = self.cache.file_set

        output = []

        counter = 1

        for commit in self.horizon_commit_list:
            if len(commit.parents) == 1:
                files = self.file_set.get_existing_multiple(commit.stats.files)

                if parsing.is_fix_commit(commit.message):
                    # add files to horizon_faulty."""
                    map(lambda x: self.horizon_faulty_file_set.add(x),
                        files)

                    normal_set = self.horizon_normal_file_set \
                        - self.horizon_faulty_file_set

                    faulty_set = self.horizon_faulty_file_set

                    true_positive = len(cache_set & faulty_set)
                    false_positive = len(cache_set & normal_set)
                    true_negative = len(normal_set - cache_set)
                    false_negative = len(faulty_set - cache_set)
                    file_count = len(normal_set | faulty_set)

                    out = (counter, true_positive, false_positive,
                           true_negative, false_negative,
                           file_count, commit.hexsha)

                    output.append(out)

                else:
                    # add files to horizon normal
                    map(lambda x: self.horizon_normal_file_set.add(x),
                        files)

                counter += 1

        return output
Пример #2
0
    def run_fixcache(self):
        commit_num = float(len(self.commit_order))
        """Run fixcache for RandomRepository."""
        for commit in self.commit_list:
            percentage = 100 * self.commit_order[commit.hexsha] / commit_num
            logger.debug('[%s%]Currently at %s' % (percentage, commit))
            parents = commit.parents
            if len(parents) == 1:
                # return the list of tuples by file info
                f_info = self.file_set.get_and_update_multiple(
                    git_stat=commit.stats.files,
                    commit_num=self.commit_order[commit.hexsha])
                files = [
                    x[1] for x in filter(
                        lambda x: x[0] == 'changed' or x[0] == 'created',
                        f_info)
                ]

                deleted_files = [
                    x[1] for x in filter(lambda x: x[0] == 'deleted', f_info)
                ]

                self.file_set.remove_files(deleted_files)

                if parsing.is_fix_commit(commit.message):
                    random_file_set = self.file_set.get_random(self.cache_size)
                    for file_ in files:
                        if file_.path in random_file_set:
                            self.hit_count += 1
                        else:
                            self.miss_count += 1

            elif len(parents) == 0:
                # initial commit
                files = self._get_commit_tree_files(commit)
                files_to_add = []
                for path in files:
                    line_count = self._get_line_count(path, commit)
                    created, file_ = self.file_set.get_or_create_file(
                        file_path=path, line_count=line_count)
                    files_to_add.append(file_)
            else:
                pass
Пример #3
0
    def run_fixcache(self):
        """Run fixcache with the given variables."""
        commit_num = float(len(self.commit_order))
        for commit in self.commit_list:
            percentage = 100 * self.commit_order[commit.hexsha] / commit_num
            logger.debug('[%s]Currently at %s' % (int(percentage), commit))
            parents = commit.parents

            if len(parents) == 1:
                # return the list of tuples by file info
                f_info = self.file_set.get_and_update_multiple(
                    git_stat=commit.stats.files,
                    commit_num=self.commit_order[commit.hexsha])
                changed_files = [
                    x[1] for x in filter(lambda x: x[0] == 'changed', f_info)
                ]

                deleted_files = [
                    x[1] for x in filter(lambda x: x[0] == 'deleted', f_info)
                ]

                created_files = [
                    x[1] for x in filter(lambda x: x[0] == 'created', f_info)
                ]

                self._cleanup_files(deleted_files)

                self._update_distance_set(
                    created_files + changed_files, commit)

                if parsing.is_fix_commit(commit.message):
                    for file_ in changed_files:
                        file_.fault(self.commit_order[commit.hexsha])
                        if self.cache.file_in(file_):
                            self.hit_count += 1
                        else:
                            deleted_line_dict = self._get_diff_deleted_lines(
                                commit, parents[0])
                            # print deleted_line_dict
                            del_lines = deleted_line_dict[file_.path]
                            self.miss_count += 1
                            self.cache.add(file_)

                            line_intr_c = self._get_line_introducing_commits(
                                del_lines, file_.path, commit.parents[0])

                            closest_file_set = []
                            for c in line_intr_c:
                                # get closest files is nlogk, so optimal
                                cf = self.file_distances.get_closest_files(
                                    file_,
                                    self.distance_to_fetch,
                                    self.commit_order[c.hexsha])
                                closest_file_set += cf

                            closest_file_set = list(set(closest_file_set))
                            # there is no need for pre sorting, as already
                            # fetchiing closest files
                            self.cache.add_multiple(
                                closest_file_set)

                new_entity_pre_fetch = self._get_per_rev_pre_fetch(
                    created_files, commit)

                changed_entity_pre_fetch = self._get_per_rev_pre_fetch(
                    changed_files, commit)

                self.cache.add_multiple(new_entity_pre_fetch)
                self.cache.add_multiple(changed_entity_pre_fetch)
            elif len(parents) == 0:
                # initial commit
                files = self._get_commit_tree_files(commit)
                files_to_add = []
                for path in files:
                    line_count = self._get_line_count(path, commit)
                    created, file_ = self.file_set.get_or_create_file(
                        file_path=path, line_count=line_count)
                    if not created:
                        file_.line_count = line_count
                    files_to_add.append(file_)
                self.cache.add_multiple(files_to_add)