def test_failure_prone_file_eq_false(self): lf1 = FailureProneFile(filepath='file1.yml', commit='123', fixing_commit='456') lf2 = FailureProneFile(filepath='file1.yml', commit='456', fixing_commit='456') lf3 = FailureProneFile(filepath='file2.yml', commit='123', fixing_commit='456') assert lf1 != lf2 != lf3 assert lf1 not in [lf2, lf3]
def test_failure_prone_file_eq_false_instance(self): lf1 = FailureProneFile(filepath='file1.yml', commit='123', fixing_commit='456') ff1 = FixedFile(filepath='file2.yml', fic='123', bic='456') assert lf1 != ff1
def label(self) -> Generator[FailureProneFile, None, None]: """ For each FixedFile object, yield a FailureProneFile object for each commit between the FixedFile's bug-introducing-commit and its fixing-commit. `Note:` make sure to run the method ``get_fixed_files`` before. Yields ------ FailureProneFile A FailureProneFile object. """ if not (self.fixing_commits or self.fixed_files): return labeling = dict() for file in self.fixed_files: labeling.setdefault(file.filepath, list()).append(file) for commit in RepositoryMining(self.path_to_repo, from_commit=self.fixing_commits[-1], to_commit=self.commit_hashes[0], order='reverse').traverse_commits(): for files in labeling.values(): for file in files: idx_fic = self.commit_hashes.index(file.fic) idx_bic = self.commit_hashes.index(file.bic) idx_commit = self.commit_hashes.index(commit.hash) if idx_fic > idx_commit >= idx_bic: yield FailureProneFile(filepath=file.filepath, commit=commit.hash, fixing_commit=file.fic) if idx_commit == idx_bic and file.filepath in labeling: if file in labeling[file.filepath]: labeling[file.filepath].remove(file) # Handle file renaming for modified_file in commit.modifications: filepath = modified_file.new_path for file in list(labeling.get(filepath, list())): if self.commit_hashes.index( file.fic) > self.commit_hashes.index( commit.hash) >= self.commit_hashes.index( file.bic): if modified_file.change_type == ModificationType.ADD: if filepath in labeling and file in labeling[ filepath]: labeling[filepath].remove(file) elif modified_file.change_type == ModificationType.RENAME: file.filepath = modified_file.old_path break
def test_fixed_file_encoder_none(self): """Pass a FailureProneFile instance instead of a FixedFile instance to FixedFileEncoder to test the instance is correct """ lf1 = FailureProneFile(filepath='file1.yml', commit='123', fixing_commit='456') encoded = FixedFileEncoder().default(lf1) assert encoded is None
def test_encoder(self): lf1 = FailureProneFile(filepath='file1.yml', commit='123', fixing_commit='456') encoded = FailureProneFileEncoder().default(lf1) assert type(encoded) == dict assert encoded == { "filepath": lf1.filepath, "commit": lf1.commit, "fixing_commit": lf1.fixing_commit }
def test_extract_at_release(self): me = BaseMetricsExtractor( path_to_repo= 'https://github.com/stefanodallapalma/radon-repository-miner-testing', clone_repo_to=self.path_to_tmp_dir, at='release') labeled_files = [ FailureProneFile( filepath='test_is_comment_changed.py', commit='c029d7520456e5468d66b56fe176146680520b20', fixing_commit='d39fdb44e98869835fe59a86d20d05a9e82d5282') ] me.extract(labeled_files, product=True, process=False, delta=False) self.assertEqual(me.dataset.shape, (9, 4)) me.extract(labeled_files, product=False, process=True, delta=False) self.assertEqual(me.dataset.shape, (9, 20)) me.extract(labeled_files, product=False, process=False, delta=True) self.assertEqual(me.dataset.shape, (9, 4)) me.extract(labeled_files, product=True, process=False, delta=True) self.assertEqual(me.dataset.shape, (9, 4)) me.extract(labeled_files, product=False, process=True, delta=True) self.assertEqual(me.dataset.shape, (9, 36)) labeled_files = [ FailureProneFile( filepath='test_is_comment_changed.py', commit='d39fdb44e98869835fe59a86d20d05a9e82d5282', fixing_commit='75da5889425815009cc0eb4bdff68f59024d351f') ] me.extract(labeled_files, product=True, process=False, delta=False) self.assertEqual(me.dataset.shape, (9, 4)) self.assertEqual(me.dataset.failure_prone.to_list().count(0), 8) self.assertEqual(me.dataset.failure_prone.to_list().count(1), 1)
def label(self) -> Generator[FailureProneFile, None, None]: """ For each FixedFile object, yield a FailureProneFile object for each commit between the FixedFile's bug-introducing-commit and its fixing-commit. `Note:` make sure to run the method ``get_fixed_files`` before. Yields ------ FailureProneFile A FailureProneFile object. """ if not (self.fixing_commits and self.fixed_files): return labeling = dict() for file in self.fixed_files: labeling.setdefault(file.filepath, list()).append(file) self.sort_commits(self.fixing_commits) renamed_files = {} for commit in Repository(self.path_to_repo, from_commit=self.fixing_commits[-1], to_commit=self.commit_hashes[0], order='reverse', num_workers=1).traverse_commits(): for file in self.fixed_files: idx_fic = self.commit_hashes.index(file.fic) idx_bic = self.commit_hashes.index(file.bic) idx_commit = self.commit_hashes.index(commit.hash) if idx_fic > idx_commit >= idx_bic: yield FailureProneFile(filepath=renamed_files.get( file.filepath, file.filepath), commit=commit.hash, fixing_commit=file.fic) # Handle file renaming for modified_file in commit.modified_files: if modified_file.change_type == ModificationType.RENAME: renamed_files[ modified_file.new_path] = modified_file.old_path
def extract(self, labeled_files: List[FailureProneFile], product: bool = True, process: bool = True, delta: bool = False): """ Extract metrics from labeled files. Parameters ---------- labeled_files : List[FailureProneFile] The list of FailureProneFile objects that are used to label a script as failure-prone (1) or clean (0). product: bool Whether to extract product metrics. process: bool Whether to extract process metrics. delta: bool Whether to extract delta metrics between two successive releases (or commits). """ git_repo = GitRepository(self.path_to_repo) metrics_previous_release = dict( ) # Values for iac metrics in the last release for commit in RepositoryMining(self.path_to_repo, order='date-order').traverse_commits(): # To handle renaming in metrics_previous_release for modified_file in commit.modifications: old_path = modified_file.old_path new_path = modified_file.new_path if old_path != new_path and old_path in metrics_previous_release: # Rename key old_path wit new_path metrics_previous_release[ new_path] = metrics_previous_release.pop(old_path) if commit.hash not in self.releases: continue # Else git_repo.checkout(commit.hash) if process: # Extract process metrics i = self.releases.index(commit.hash) from_previous_commit = commit.hash if i == 0 else self.releases[ i - 1] to_current_commit = commit.hash # = self.releases[i] process_metrics = self.get_process_metrics( from_previous_commit, to_current_commit) for filepath in self.get_files(): file_content = get_content( os.path.join(self.path_to_repo, filepath)) if not file_content or self.ignore_file( filepath, file_content): continue tmp = FailureProneFile(filepath=filepath, commit=commit.hash, fixing_commit='') if tmp not in labeled_files: label = 0 # clean else: label = 1 # failure-prone metrics = dict(filepath=filepath, commit=commit.hash, committed_at=str(commit.committer_date), failure_prone=label) if process_metrics: metrics['change_set_max'] = process_metrics[ 'dict_change_set_max'] metrics['change_set_avg'] = process_metrics[ 'dict_change_set_avg'] metrics['code_churn_count'] = process_metrics[ 'dict_code_churn_count'].get(filepath, 0) metrics['code_churn_max'] = process_metrics[ 'dict_code_churn_max'].get(filepath, 0) metrics['code_churn_avg'] = process_metrics[ 'dict_code_churn_avg'].get(filepath, 0) metrics['commits_count'] = process_metrics[ 'dict_commits_count'].get(filepath, 0) metrics['contributors_count'] = process_metrics[ 'dict_contributors_count'].get(filepath, 0) metrics['minor_contributors_count'] = process_metrics[ 'dict_minor_contributors_count'].get(filepath, 0) metrics[ 'highest_contributor_experience'] = process_metrics[ 'dict_highest_contributor_experience'].get( filepath, 0) metrics['hunks_median'] = process_metrics[ 'dict_hunks_median'].get(filepath, 0) metrics['additions'] = process_metrics[ 'dict_additions'].get(filepath, 0) metrics['additions_max'] = process_metrics[ 'dict_additions_max'].get(filepath, 0) metrics['additions_avg'] = process_metrics[ 'dict_additions_avg'].get(filepath, 0) metrics['deletions'] = process_metrics[ 'dict_deletions'].get(filepath, 0) metrics['deletions_max'] = process_metrics[ 'dict_deletions_max'].get(filepath, 0) metrics['deletions_avg'] = process_metrics[ 'dict_deletions_avg'].get(filepath, 0) if product: metrics.update(self.get_product_metrics(file_content)) if delta: delta_metrics = dict() previous = metrics_previous_release.get(filepath, dict()) for metric, value in previous.items(): if metric in ('filepath', 'commit', 'committed_at', 'failure_prone'): continue difference = metrics.get(metric, 0) - value delta_metrics[f'delta_{metric}'] = round(difference, 3) metrics_previous_release[filepath] = metrics.copy() metrics.update(delta_metrics) self.dataset = self.dataset.append(metrics, ignore_index=True) git_repo.reset()
def test_label(self): self.miner.fixing_commits = [ '755efda3359954588c8486272b17979b3a6512a2', 'e7df3e45e2e27a0dc16806a834b50d0856d350fe', '70257245257cd899b6f26870e8db11f5b66a4676', '73377dbdd160cc69898caa0e97975f12172bba41', '07d2c6720718e498598e64f24a14b992b29bdf61', '4428cdf62d124df67fa87c29ace3db6906504ea4', 'fa1523351a14b6f0543cd49a131ed8aaed594fdb', '68195f290a09d119d2e334ed6a8add79ecf2ce5b' ] self.miner.fixed_files = [ FixedFile(filepath='tasks/task2-renamed.yml', fic='68195f290a09d119d2e334ed6a8add79ecf2ce5b', bic='92b9975e1b4449b9ea8f1be5e401fdd99a37b576'), FixedFile(filepath='tasks/task2.yml', fic='07d2c6720718e498598e64f24a14b992b29bdf61', bic='a3d029beb2ce2e4f01dfe49e09f17bae9c92025f'), FixedFile(filepath='tasks/task1.yml', fic='70257245257cd899b6f26870e8db11f5b66a4676', bic='9cae22d8c88d04bd19e51623ed41e8805651aaed') ] failure_prone_files = list([file for file in self.miner.label()]) self.assertEqual(failure_prone_files, [ FailureProneFile(filepath='tasks/task2-renamed.yml', commit='83595c66d71c54b7c20f85522055386eb4b42b6e', fixing_commit='68195f290a09d119d2e334ed6a8add79ecf2ce5b'), FailureProneFile(filepath='tasks/task2.yml', commit='fa1523351a14b6f0543cd49a131ed8aaed594fdb', fixing_commit='68195f290a09d119d2e334ed6a8add79ecf2ce5b'), FailureProneFile(filepath='tasks/task2.yml', commit='64f813de2a78fd17d898072a0d118234c1235fad', fixing_commit='68195f290a09d119d2e334ed6a8add79ecf2ce5b'), FailureProneFile(filepath='tasks/task2.yml', commit='ba54ae7f42cfd11e0e1b61bb1de175052d53742b', fixing_commit='68195f290a09d119d2e334ed6a8add79ecf2ce5b'), FailureProneFile(filepath='tasks/task2.yml', commit='4428cdf62d124df67fa87c29ace3db6906504ea4', fixing_commit='68195f290a09d119d2e334ed6a8add79ecf2ce5b'), FailureProneFile(filepath='tasks/task2.yml', commit='92b9975e1b4449b9ea8f1be5e401fdd99a37b576', fixing_commit='68195f290a09d119d2e334ed6a8add79ecf2ce5b'), FailureProneFile(filepath='tasks/task2.yml', commit='73377dbdd160cc69898caa0e97975f12172bba41', fixing_commit='07d2c6720718e498598e64f24a14b992b29bdf61'), FailureProneFile(filepath='tasks/task2.yml', commit='104f7fd66686e41a8cdd1161e975356530fcd58a', fixing_commit='07d2c6720718e498598e64f24a14b992b29bdf61'), FailureProneFile(filepath='tasks/task2.yml', commit='e5b2e85fb4e9c761cfe0c92b7f09ae95526a0e08', fixing_commit='07d2c6720718e498598e64f24a14b992b29bdf61'), FailureProneFile(filepath='tasks/task2.yml', commit='a3d029beb2ce2e4f01dfe49e09f17bae9c92025f', fixing_commit='07d2c6720718e498598e64f24a14b992b29bdf61'), FailureProneFile(filepath='tasks/task1.yml', commit='e7df3e45e2e27a0dc16806a834b50d0856d350fe', fixing_commit='70257245257cd899b6f26870e8db11f5b66a4676'), FailureProneFile(filepath='tasks/task1.yml', commit='d07ed2f58c7cbabee89dbc60a62036f22c23394a', fixing_commit='70257245257cd899b6f26870e8db11f5b66a4676'), FailureProneFile(filepath='tasks/task1.yml', commit='755efda3359954588c8486272b17979b3a6512a2', fixing_commit='70257245257cd899b6f26870e8db11f5b66a4676'), FailureProneFile(filepath='tasks/task1.yml', commit='e14240d8ca0ffd3ca8f093f39111d048819ab909', fixing_commit='70257245257cd899b6f26870e8db11f5b66a4676'), FailureProneFile(filepath='tasks/task1.yml', commit='9cae22d8c88d04bd19e51623ed41e8805651aaed', fixing_commit='70257245257cd899b6f26870e8db11f5b66a4676'), ])