def test_failure_prone_file_eq_false(self):
        lf1 = FailureProneFile(filepath='file1.yml',
                               commit='123',
                               fixing_commit='456')
        lf2 = FailureProneFile(filepath='file1.yml',
                               commit='456',
                               fixing_commit='456')
        lf3 = FailureProneFile(filepath='file2.yml',
                               commit='123',
                               fixing_commit='456')

        assert lf1 != lf2 != lf3
        assert lf1 not in [lf2, lf3]
    def test_failure_prone_file_eq_false_instance(self):
        lf1 = FailureProneFile(filepath='file1.yml',
                               commit='123',
                               fixing_commit='456')
        ff1 = FixedFile(filepath='file2.yml', fic='123', bic='456')

        assert lf1 != ff1
Пример #3
0
    def label(self) -> Generator[FailureProneFile, None, None]:
        """
        For each FixedFile object, yield a FailureProneFile object for each commit between the FixedFile's
        bug-introducing-commit and its fixing-commit.

        `Note:` make sure to run the method ``get_fixed_files`` before.

        Yields
        ------
        FailureProneFile
            A FailureProneFile object.

        """

        if not (self.fixing_commits or self.fixed_files):
            return

        labeling = dict()
        for file in self.fixed_files:
            labeling.setdefault(file.filepath, list()).append(file)

        for commit in RepositoryMining(self.path_to_repo,
                                       from_commit=self.fixing_commits[-1],
                                       to_commit=self.commit_hashes[0],
                                       order='reverse').traverse_commits():

            for files in labeling.values():
                for file in files:

                    idx_fic = self.commit_hashes.index(file.fic)
                    idx_bic = self.commit_hashes.index(file.bic)
                    idx_commit = self.commit_hashes.index(commit.hash)

                    if idx_fic > idx_commit >= idx_bic:
                        yield FailureProneFile(filepath=file.filepath,
                                               commit=commit.hash,
                                               fixing_commit=file.fic)

                    if idx_commit == idx_bic and file.filepath in labeling:
                        if file in labeling[file.filepath]:
                            labeling[file.filepath].remove(file)

            # Handle file renaming
            for modified_file in commit.modifications:
                filepath = modified_file.new_path

                for file in list(labeling.get(filepath, list())):
                    if self.commit_hashes.index(
                            file.fic) > self.commit_hashes.index(
                                commit.hash) >= self.commit_hashes.index(
                                    file.bic):

                        if modified_file.change_type == ModificationType.ADD:
                            if filepath in labeling and file in labeling[
                                    filepath]:
                                labeling[filepath].remove(file)
                        elif modified_file.change_type == ModificationType.RENAME:
                            file.filepath = modified_file.old_path
                        break
 def test_fixed_file_encoder_none(self):
     """Pass a FailureProneFile instance instead of a FixedFile instance to FixedFileEncoder to test the instance is
     correct
     """
     lf1 = FailureProneFile(filepath='file1.yml',
                            commit='123',
                            fixing_commit='456')
     encoded = FixedFileEncoder().default(lf1)
     assert encoded is None
    def test_encoder(self):
        lf1 = FailureProneFile(filepath='file1.yml',
                               commit='123',
                               fixing_commit='456')

        encoded = FailureProneFileEncoder().default(lf1)
        assert type(encoded) == dict
        assert encoded == {
            "filepath": lf1.filepath,
            "commit": lf1.commit,
            "fixing_commit": lf1.fixing_commit
        }
Пример #6
0
    def test_extract_at_release(self):
        me = BaseMetricsExtractor(
            path_to_repo=
            'https://github.com/stefanodallapalma/radon-repository-miner-testing',
            clone_repo_to=self.path_to_tmp_dir,
            at='release')

        labeled_files = [
            FailureProneFile(
                filepath='test_is_comment_changed.py',
                commit='c029d7520456e5468d66b56fe176146680520b20',
                fixing_commit='d39fdb44e98869835fe59a86d20d05a9e82d5282')
        ]

        me.extract(labeled_files, product=True, process=False, delta=False)
        self.assertEqual(me.dataset.shape, (9, 4))

        me.extract(labeled_files, product=False, process=True, delta=False)
        self.assertEqual(me.dataset.shape, (9, 20))

        me.extract(labeled_files, product=False, process=False, delta=True)
        self.assertEqual(me.dataset.shape, (9, 4))

        me.extract(labeled_files, product=True, process=False, delta=True)
        self.assertEqual(me.dataset.shape, (9, 4))

        me.extract(labeled_files, product=False, process=True, delta=True)
        self.assertEqual(me.dataset.shape, (9, 36))

        labeled_files = [
            FailureProneFile(
                filepath='test_is_comment_changed.py',
                commit='d39fdb44e98869835fe59a86d20d05a9e82d5282',
                fixing_commit='75da5889425815009cc0eb4bdff68f59024d351f')
        ]

        me.extract(labeled_files, product=True, process=False, delta=False)
        self.assertEqual(me.dataset.shape, (9, 4))
        self.assertEqual(me.dataset.failure_prone.to_list().count(0), 8)
        self.assertEqual(me.dataset.failure_prone.to_list().count(1), 1)
Пример #7
0
    def label(self) -> Generator[FailureProneFile, None, None]:
        """
        For each FixedFile object, yield a FailureProneFile object for each commit between the FixedFile's
        bug-introducing-commit and its fixing-commit.

        `Note:` make sure to run the method ``get_fixed_files`` before.

        Yields
        ------
        FailureProneFile
            A FailureProneFile object.

        """

        if not (self.fixing_commits and self.fixed_files):
            return

        labeling = dict()
        for file in self.fixed_files:
            labeling.setdefault(file.filepath, list()).append(file)

        self.sort_commits(self.fixing_commits)

        renamed_files = {}

        for commit in Repository(self.path_to_repo,
                                 from_commit=self.fixing_commits[-1],
                                 to_commit=self.commit_hashes[0],
                                 order='reverse',
                                 num_workers=1).traverse_commits():

            for file in self.fixed_files:

                idx_fic = self.commit_hashes.index(file.fic)
                idx_bic = self.commit_hashes.index(file.bic)
                idx_commit = self.commit_hashes.index(commit.hash)

                if idx_fic > idx_commit >= idx_bic:
                    yield FailureProneFile(filepath=renamed_files.get(
                        file.filepath, file.filepath),
                                           commit=commit.hash,
                                           fixing_commit=file.fic)

            # Handle file renaming
            for modified_file in commit.modified_files:
                if modified_file.change_type == ModificationType.RENAME:
                    renamed_files[
                        modified_file.new_path] = modified_file.old_path
Пример #8
0
    def extract(self,
                labeled_files: List[FailureProneFile],
                product: bool = True,
                process: bool = True,
                delta: bool = False):
        """ Extract metrics from labeled files.

        Parameters
        ----------
        labeled_files : List[FailureProneFile]
            The list of FailureProneFile objects that are used to label a script as failure-prone (1) or clean (0).
        product: bool
            Whether to extract product metrics.
        process: bool
            Whether to extract process metrics.
        delta: bool
            Whether to extract delta metrics between two successive releases (or commits).

        """
        git_repo = GitRepository(self.path_to_repo)

        metrics_previous_release = dict(
        )  # Values for iac metrics in the last release

        for commit in RepositoryMining(self.path_to_repo,
                                       order='date-order').traverse_commits():

            # To handle renaming in metrics_previous_release
            for modified_file in commit.modifications:

                old_path = modified_file.old_path
                new_path = modified_file.new_path

                if old_path != new_path and old_path in metrics_previous_release:
                    # Rename key old_path wit new_path
                    metrics_previous_release[
                        new_path] = metrics_previous_release.pop(old_path)

            if commit.hash not in self.releases:
                continue

            # Else
            git_repo.checkout(commit.hash)

            if process:
                # Extract process metrics
                i = self.releases.index(commit.hash)
                from_previous_commit = commit.hash if i == 0 else self.releases[
                    i - 1]
                to_current_commit = commit.hash  # = self.releases[i]
                process_metrics = self.get_process_metrics(
                    from_previous_commit, to_current_commit)

            for filepath in self.get_files():

                file_content = get_content(
                    os.path.join(self.path_to_repo, filepath))

                if not file_content or self.ignore_file(
                        filepath, file_content):
                    continue

                tmp = FailureProneFile(filepath=filepath,
                                       commit=commit.hash,
                                       fixing_commit='')
                if tmp not in labeled_files:
                    label = 0  # clean
                else:
                    label = 1  # failure-prone

                metrics = dict(filepath=filepath,
                               commit=commit.hash,
                               committed_at=str(commit.committer_date),
                               failure_prone=label)

                if process_metrics:
                    metrics['change_set_max'] = process_metrics[
                        'dict_change_set_max']
                    metrics['change_set_avg'] = process_metrics[
                        'dict_change_set_avg']
                    metrics['code_churn_count'] = process_metrics[
                        'dict_code_churn_count'].get(filepath, 0)
                    metrics['code_churn_max'] = process_metrics[
                        'dict_code_churn_max'].get(filepath, 0)
                    metrics['code_churn_avg'] = process_metrics[
                        'dict_code_churn_avg'].get(filepath, 0)
                    metrics['commits_count'] = process_metrics[
                        'dict_commits_count'].get(filepath, 0)
                    metrics['contributors_count'] = process_metrics[
                        'dict_contributors_count'].get(filepath, 0)
                    metrics['minor_contributors_count'] = process_metrics[
                        'dict_minor_contributors_count'].get(filepath, 0)
                    metrics[
                        'highest_contributor_experience'] = process_metrics[
                            'dict_highest_contributor_experience'].get(
                                filepath, 0)
                    metrics['hunks_median'] = process_metrics[
                        'dict_hunks_median'].get(filepath, 0)
                    metrics['additions'] = process_metrics[
                        'dict_additions'].get(filepath, 0)
                    metrics['additions_max'] = process_metrics[
                        'dict_additions_max'].get(filepath, 0)
                    metrics['additions_avg'] = process_metrics[
                        'dict_additions_avg'].get(filepath, 0)
                    metrics['deletions'] = process_metrics[
                        'dict_deletions'].get(filepath, 0)
                    metrics['deletions_max'] = process_metrics[
                        'dict_deletions_max'].get(filepath, 0)
                    metrics['deletions_avg'] = process_metrics[
                        'dict_deletions_avg'].get(filepath, 0)

                if product:
                    metrics.update(self.get_product_metrics(file_content))

                if delta:
                    delta_metrics = dict()

                    previous = metrics_previous_release.get(filepath, dict())
                    for metric, value in previous.items():

                        if metric in ('filepath', 'commit', 'committed_at',
                                      'failure_prone'):
                            continue

                        difference = metrics.get(metric, 0) - value
                        delta_metrics[f'delta_{metric}'] = round(difference, 3)

                    metrics_previous_release[filepath] = metrics.copy()
                    metrics.update(delta_metrics)

                self.dataset = self.dataset.append(metrics, ignore_index=True)

            git_repo.reset()
    def test_label(self):

        self.miner.fixing_commits = [
            '755efda3359954588c8486272b17979b3a6512a2',
            'e7df3e45e2e27a0dc16806a834b50d0856d350fe',
            '70257245257cd899b6f26870e8db11f5b66a4676',
            '73377dbdd160cc69898caa0e97975f12172bba41',
            '07d2c6720718e498598e64f24a14b992b29bdf61',
            '4428cdf62d124df67fa87c29ace3db6906504ea4',
            'fa1523351a14b6f0543cd49a131ed8aaed594fdb',
            '68195f290a09d119d2e334ed6a8add79ecf2ce5b'
        ]

        self.miner.fixed_files = [
            FixedFile(filepath='tasks/task2-renamed.yml',
                      fic='68195f290a09d119d2e334ed6a8add79ecf2ce5b',
                      bic='92b9975e1b4449b9ea8f1be5e401fdd99a37b576'),
            FixedFile(filepath='tasks/task2.yml',
                      fic='07d2c6720718e498598e64f24a14b992b29bdf61',
                      bic='a3d029beb2ce2e4f01dfe49e09f17bae9c92025f'),
            FixedFile(filepath='tasks/task1.yml',
                      fic='70257245257cd899b6f26870e8db11f5b66a4676',
                      bic='9cae22d8c88d04bd19e51623ed41e8805651aaed')
        ]

        failure_prone_files = list([file for file in self.miner.label()])

        self.assertEqual(failure_prone_files, [
            FailureProneFile(filepath='tasks/task2-renamed.yml',
                             commit='83595c66d71c54b7c20f85522055386eb4b42b6e',
                             fixing_commit='68195f290a09d119d2e334ed6a8add79ecf2ce5b'),
            FailureProneFile(filepath='tasks/task2.yml',
                             commit='fa1523351a14b6f0543cd49a131ed8aaed594fdb',
                             fixing_commit='68195f290a09d119d2e334ed6a8add79ecf2ce5b'),
            FailureProneFile(filepath='tasks/task2.yml',
                             commit='64f813de2a78fd17d898072a0d118234c1235fad',
                             fixing_commit='68195f290a09d119d2e334ed6a8add79ecf2ce5b'),
            FailureProneFile(filepath='tasks/task2.yml',
                             commit='ba54ae7f42cfd11e0e1b61bb1de175052d53742b',
                             fixing_commit='68195f290a09d119d2e334ed6a8add79ecf2ce5b'),
            FailureProneFile(filepath='tasks/task2.yml',
                             commit='4428cdf62d124df67fa87c29ace3db6906504ea4',
                             fixing_commit='68195f290a09d119d2e334ed6a8add79ecf2ce5b'),
            FailureProneFile(filepath='tasks/task2.yml',
                             commit='92b9975e1b4449b9ea8f1be5e401fdd99a37b576',
                             fixing_commit='68195f290a09d119d2e334ed6a8add79ecf2ce5b'),
            FailureProneFile(filepath='tasks/task2.yml',
                             commit='73377dbdd160cc69898caa0e97975f12172bba41',
                             fixing_commit='07d2c6720718e498598e64f24a14b992b29bdf61'),
            FailureProneFile(filepath='tasks/task2.yml',
                             commit='104f7fd66686e41a8cdd1161e975356530fcd58a',
                             fixing_commit='07d2c6720718e498598e64f24a14b992b29bdf61'),
            FailureProneFile(filepath='tasks/task2.yml',
                             commit='e5b2e85fb4e9c761cfe0c92b7f09ae95526a0e08',
                             fixing_commit='07d2c6720718e498598e64f24a14b992b29bdf61'),
            FailureProneFile(filepath='tasks/task2.yml',
                             commit='a3d029beb2ce2e4f01dfe49e09f17bae9c92025f',
                             fixing_commit='07d2c6720718e498598e64f24a14b992b29bdf61'),
            FailureProneFile(filepath='tasks/task1.yml',
                             commit='e7df3e45e2e27a0dc16806a834b50d0856d350fe',
                             fixing_commit='70257245257cd899b6f26870e8db11f5b66a4676'),
            FailureProneFile(filepath='tasks/task1.yml',
                             commit='d07ed2f58c7cbabee89dbc60a62036f22c23394a',
                             fixing_commit='70257245257cd899b6f26870e8db11f5b66a4676'),
            FailureProneFile(filepath='tasks/task1.yml',
                             commit='755efda3359954588c8486272b17979b3a6512a2',
                             fixing_commit='70257245257cd899b6f26870e8db11f5b66a4676'),
            FailureProneFile(filepath='tasks/task1.yml',
                             commit='e14240d8ca0ffd3ca8f093f39111d048819ab909',
                             fixing_commit='70257245257cd899b6f26870e8db11f5b66a4676'),
            FailureProneFile(filepath='tasks/task1.yml',
                             commit='9cae22d8c88d04bd19e51623ed41e8805651aaed',
                             fixing_commit='70257245257cd899b6f26870e8db11f5b66a4676'),
        ])