def __init__(self, path_to_repo: str, clone_repo_to: str = None, at: str = 'release'):
        """ The class constructor.

        Parameters
        ----------
        path_to_repo : str
            The path to a local or remote repository.

        clone_repo_to : str
            Path to clone the repository to.
            If path_to_repo links to a local repository, this parameter is not used. Otherwise it is mandatory.

        at : str
            When to extract metrics: at each release or each commit.

        Attributes
        ----------
        dataset: pandas.DataFrame
            The metrics dataset, populated after ``extract()``.

        Raises
        ------
        ValueError
            If `at` is not release or commit, or if the path to the remote repository does not link to a github or
            gitlab repository.
        NotImplementedError
            The commit option is not implemented yet.

        """

        if at not in ('release', 'commit'):
            raise ValueError(f'{at} is not valid! Use \'release\' or \'commit\'.')

        self.path_to_repo = path_to_repo

        if is_remote(path_to_repo):

            if not clone_repo_to:
                raise ValueError('clone_repo_to is mandatory when linking to a remote repository.')

            full_name_pattern = re.compile(r'git(hub|lab)\.com/([\w\W]+)$')
            match = full_name_pattern.search(path_to_repo.replace('.git', ''))

            if not match:
                raise ValueError('The remote repository must be hosted on github or gitlab.')

            repo_name = match.groups()[1].split('/')[1]
            self.path_to_repo = os.path.join(clone_repo_to, repo_name)

            if os.path.isdir(self.path_to_repo):
                clone_repo_to = None

        repo_miner = Repository(path_to_repo=path_to_repo,
                                clone_repo_to=clone_repo_to,
                                only_releases=True if at == 'release' else False,
                                order='date-order', num_workers=1)

        self.commits_at = [commit.hash for commit in repo_miner.traverse_commits()]
        self.dataset = pd.DataFrame()
示例#2
0
def test_only_authors():
    lc = list(
        Repository('test-repos/multiple_authors',
                   only_authors=["Maurício Aniche"]).traverse_commits())
    assert len(lc) == 4

    lc = list(
        Repository('test-repos/multiple_authors',
                   only_authors=["ishepard"]).traverse_commits())
    assert len(lc) == 1
示例#3
0
def test_single_commit_head():
    lc = list(
        Repository('test-repos/complex_repo',
                   single="e7d13b0511f8a176284ce4f92ed8c6e8d09c77f2").
        traverse_commits())
    assert len(lc) == 1

    lc_head = list(
        Repository('test-repos/complex_repo',
                   single="HEAD").traverse_commits())
    assert len(lc_head) == 1
    assert lc[0].hash == lc_head[0].hash
示例#4
0
def test_single_commit():
    lc = list(
        Repository('test-repos/complex_repo',
                   single="866e997a9e44cb4ddd9e00efe49361420aff2559").
        traverse_commits())
    assert len(lc) == 1
    assert lc[0].hash == "866e997a9e44cb4ddd9e00efe49361420aff2559"

    lc = list(
        Repository('test-repos/complex_repo',
                   single="ffccf1e7497eb8136fd66ed5e42bef29677c4b71").
        traverse_commits())
    assert len(lc) == 1
    assert lc[0].hash == "ffccf1e7497eb8136fd66ed5e42bef29677c4b71"
示例#5
0
def test_only_in_branches():
    # by default, only analyze master
    assert len(
        list(Repository(
            'test-repos/branches_not_merged').traverse_commits())) == 3
    # only analyze b2
    assert len(
        list(
            Repository('test-repos/branches_not_merged',
                       only_in_branch='b2').traverse_commits())) == 4
    # only analyze b1
    assert len(
        list(
            Repository('test-repos/branches_not_merged',
                       only_in_branch='b1').traverse_commits())) == 5
示例#6
0
    def discard_undesired_fixing_commits(self, commits: List[str]):
        """
        Given a list of commits, discard commits that do not modify at least one Tosca file.

        Note, the update occurs in-place. That is, the original list is updated.

        Parameters
        ----------
        commits : List[str]
            List of commit hash

        """
        # get a sorted list of commits in ascending order of date
        self.sort_commits(commits)

        for commit in Repository(
                self.path_to_repo,
                from_commit=commits[0],  # first commit in commits
                to_commit=commits[-1],  # last commit in commits
                only_in_branch=self.branch).traverse_commits():

            # if none of the modified files is a TOSCA file, then discard the commit
            if not any(modified_file.change_type == ModificationType.MODIFY
                       and filters.is_tosca_file(modified_file.new_path,
                                                 modified_file.source_code)
                       for modified_file in commit.modified_files):
                if commit.hash in commits:
                    commits.remove(commit.hash)
示例#7
0
    def discard_undesired_fixing_commits(self, commits: List[str]):
        """
        Given a list of commits, discard commits that do not modify at least one Ansible file.

        Note, the update occurs in-place. That is, the original list is updated.

        Parameters
        ----------
        commits : List[str]
            List of commit hashes

        """
        self.sort_commits(commits)

        for commit in Repository(
                self.path_to_repo,
                from_commit=commits[0],  # first commit in commits
                to_commit=commits[-1],  # last commit in commits
                only_in_branch=self.branch).traverse_commits():
            i = 0

            # if none of the modified files is a Ansible file then discard the commit
            while i < len(commit.modified_files):
                if commit.modified_files[
                        i].change_type != ModificationType.MODIFY:
                    i += 1
                elif not filters.is_ansible_file(
                        commit.modified_files[i].new_path):
                    i += 1
                else:
                    break

            if i == len(commit.modified_files) and commit.hash in commits:
                commits.remove(commit.hash)
示例#8
0
def test_only_in_main_branch():
    lc = list(Repository('test-repos/branches_not_merged').traverse_commits())

    assert len(lc) == 3
    assert lc[0].hash == '04b0af7b53c2a0095e98951571aa41c2e0e0dbec'
    assert lc[1].hash == 'e51421e0beae6a3c20bdcdfc21066e05db675e03'
    assert lc[2].hash == 'b197ef4f0b4bc5b7d55c8949ecb1c861731f0b9d'
示例#9
0
def test_filepath_with_to():
    dt = datetime(2018, 6, 6)
    assert len(
        list(
            Repository(path_to_repo='test-repos/szz',
                       filepath='myfolder/H.java',
                       to=dt).traverse_commits())) == 5
示例#10
0
def test_no_filters():
    lc = list(Repository('test-repos/different_files').traverse_commits())

    assert len(lc) == 3
    assert lc[0].hash == 'a1b6136f978644ff1d89816bc0f2bd86f6d9d7f5'
    assert lc[1].hash == '375de7a8275ecdc0b28dc8de2568f47241f443e9'
    assert lc[2].hash == 'b8c2be250786975f1c6f47e96922096f1bb25e39'
示例#11
0
def test_filepath_with_since():
    since = datetime(2018, 6, 6)
    assert len(
        list(
            Repository(path_to_repo='test-repos/szz',
                       filepath='myfolder/H.java',
                       since=since).traverse_commits())) == 11
示例#12
0
def test_mod_with_file_types_no_extension():
    lc = list(
        Repository('test-repos/different_files',
                   only_modifications_with_file_types=['.py'
                                                       ]).traverse_commits())

    assert len(lc) == 0
示例#13
0
def test_only_releases():
    lc = list(
        Repository('test-repos/tags', only_releases=True).traverse_commits())

    assert len(lc) == 3
    assert '6bb9e2c6a8080e6b5b34e6e316c894b2ddbf7fcd' == lc[0].hash
    assert '4638730126d40716e230c2040751a13153fb1556' == lc[1].hash
    assert '627e1ad917a188a861c9fedf6e5858b79edbe439' == lc[2].hash
def test_between_dates():
    list_commits = list(Repository('test-repos/different_files',
                                   since=dt1,
                                   to=dt2).traverse_commits())

    assert len(list_commits) == 2
    assert list_commits[0].hash == 'a1b6136f978644ff1d89816bc0f2bd86f6d9d7f5'
    assert list_commits[1].hash == '375de7a8275ecdc0b28dc8de2568f47241f443e9'
def test_multiple_repos_with_tags():
    from_tag = 'tag2'
    to_tag = 'tag3'
    repos = ['test-repos/tags', 'test-repos/tags', 'test-repos/tags']
    lc = list(
        Repository(path_to_repo=repos, from_tag=from_tag,
                   to_tag=to_tag).traverse_commits())
    assert len(lc) == 9
示例#16
0
def test_should_visit_ascendent_order():
    lc = list(Repository('test-repos/small_repo').traverse_commits())
    assert len(lc) == 5
    assert lc[0].hash == 'a88c84ddf42066611e76e6cb690144e5357d132c'
    assert lc[1].hash == '6411e3096dd2070438a17b225f44475136e54e3a'
    assert lc[2].hash == '09f6182cef737db02a085e1d018963c7a29bde5a'
    assert lc[3].hash == '1f99848edadfffa903b8ba1286a935f1b92b2845'
    assert lc[4].hash == 'da39b1326dbc2edfe518b90672734a08f3c13458'
示例#17
0
def test_only_no_merge():
    lc = list(
        Repository('test-repos/branches_merged',
                   only_no_merge=True).traverse_commits())

    assert len(lc) == 3
    assert lc[0].hash == '168b3aab057ed61a769acf336a4ef5e64f76c9fd'
    assert lc[1].hash == '8169f76a3d7add54b4fc7bca7160d1f1eede6eda'
    assert lc[2].hash == '8986af2a679759e5a15794f6d56e6d46c3f302f1'
示例#18
0
def test_mod_with_file_types():
    lc = list(
        Repository('test-repos/different_files',
                   only_modifications_with_file_types=['.java'
                                                       ]).traverse_commits())

    assert len(lc) == 2
    assert lc[0].hash == 'a1b6136f978644ff1d89816bc0f2bd86f6d9d7f5'
    assert lc[1].hash == 'b8c2be250786975f1c6f47e96922096f1bb25e39'

    lc = list(
        Repository('test-repos/different_files1',
                   only_modifications_with_file_types=['.java'
                                                       ]).traverse_commits())

    assert len(lc) == 2
    assert lc[0].hash == '5adbb71167e79ab6b974827e74c9da4d81977655'
    assert lc[1].hash == '0577bec2387ee131e1ccf336adcc172224d3f6f9'
示例#19
0
def test_should_visit_descendent_order_with_filters_reversed():
    lc = list(
        Repository('test-repos/small_repo',
                   from_commit='6411e3096dd2070438a17b225f44475136e54e3a',
                   to_commit='1f99848edadfffa903b8ba1286a935f1b92b2845',
                   order='reverse').traverse_commits())
    assert len(lc) == 3
    assert lc[0].hash == '1f99848edadfffa903b8ba1286a935f1b92b2845'
    assert lc[1].hash == '09f6182cef737db02a085e1d018963c7a29bde5a'
    assert lc[2].hash == '6411e3096dd2070438a17b225f44475136e54e3a'
def test_one_timezone():
    lc = list(
        Repository('test-repos/branches_merged',
                   single='29e929fbc5dc6a2e9c620069b24e2a143af4285f').
        traverse_commits())

    to_zone = timezone(timedelta(hours=2))
    dt = datetime(2016, 4, 4, 13, 21, 25, tzinfo=to_zone)

    assert lc[0].author_date == dt
def test_between_dates_reversed():
    lc = list(
        Repository('test-repos/different_files',
                   single='375de7a8275ecdc0b28dc8de2568f47241f443e9').
        traverse_commits())

    to_zone = timezone(timedelta(hours=-4))
    dt = datetime(2016, 10, 8, 17, 57, 49, tzinfo=to_zone)

    assert lc[0].author_date == dt
def test_between_dates_without_timezone():
    dt1 = datetime(2016, 10, 8, 21, 0, 0)
    dt2 = datetime(2016, 10, 8, 21, 59, 0)
    list_commits = list(Repository('test-repos/different_files',
                                   since=dt1,
                                   to=dt2).traverse_commits())

    assert len(list_commits) == 2
    assert list_commits[0].hash == 'a1b6136f978644ff1d89816bc0f2bd86f6d9d7f5'
    assert list_commits[1].hash == '375de7a8275ecdc0b28dc8de2568f47241f443e9'
示例#23
0
def test_only_in_branch():
    lc = list(
        Repository('test-repos/branches_not_merged',
                   only_in_branch='b1').traverse_commits())
    assert len(lc) == 5

    assert lc[0].hash == '04b0af7b53c2a0095e98951571aa41c2e0e0dbec'
    assert lc[1].hash == 'e51421e0beae6a3c20bdcdfc21066e05db675e03'
    assert lc[2].hash == 'b197ef4f0b4bc5b7d55c8949ecb1c861731f0b9d'
    assert lc[3].hash == '87a31153090808f1e6f679a14ea28729a0b74f4d'
    assert lc[4].hash == '702d469710d2087e662c210fd0e4f9418ec813fd'
示例#24
0
def test_filepath_with_rename():
    dt = datetime(2018, 6, 6)
    commits = list(
        Repository(path_to_repo='test-repos/small_repo',
                   filepath='file4.java',
                   to=dt).traverse_commits())
    assert len(commits) == 2

    commit_hashes = [commit.hash for commit in commits]

    assert 'da39b1326dbc2edfe518b90672734a08f3c13458' in commit_hashes
    assert 'a88c84ddf42066611e76e6cb690144e5357d132c' in commit_hashes
示例#25
0
def test_topo_order():
    topo_order = list(
        Repository('test-repos/order', order='topo-order').traverse_commits())

    assert '5e3cfa27b3fe6dd4d12fd89664fea9397141b843' == topo_order[0].hash
    assert '19732de9e2b58ba7285f272810a9d8ddf18e7c89' == topo_order[1].hash
    assert '9cc3af5f242a1eba297f270acbdb8b6628556413' == topo_order[2].hash
    assert 'd23d7f6d37fd1163022a5dd46985acd34e6818d7' == topo_order[3].hash
    assert '78a94953a3e140f2d0027fb57963345fbf6d59fe' == topo_order[4].hash
    assert '6564f9e0bfb38725ebcfb4547e98e7f545c7de12' == topo_order[5].hash
    assert '5c95c1c6ba95a1bdb12772d1a63c7d331e664819' == topo_order[6].hash
    assert 'a45c8649b00d8b48cee04a822bd1d82acd667db2' == topo_order[7].hash
示例#26
0
def test_mod_with_file_types_and_date():
    to_zone = timezone(timedelta(hours=2))
    dt1 = datetime(2016, 10, 8, 23, 57, 49, tzinfo=to_zone)
    print(dt1)
    lc = list(
        Repository('test-repos/different_files',
                   only_modifications_with_file_types=['.java'],
                   since=dt1).traverse_commits())

    print(lc)
    assert len(lc) == 1
    assert lc[0].hash == 'b8c2be250786975f1c6f47e96922096f1bb25e39'
示例#27
0
def test_only_commits():
    lc = list(
        Repository('test-repos/complex_repo',
                   only_commits=["9e71dd5726d775fb4a5f08506a539216e878adbb"
                                 ]).traverse_commits())
    assert len(lc) == 1
    assert lc[0].hash == "9e71dd5726d775fb4a5f08506a539216e878adbb"

    lc = list(
        Repository('test-repos/complex_repo',
                   only_commits=[
                       "953737b199de233896f00b4d87a0bc2794317253",
                       "ffccf1e7497eb8136fd66ed5e42bef29677c4b71"
                   ]).traverse_commits())
    assert len(lc) == 2
    assert lc[0].hash == "ffccf1e7497eb8136fd66ed5e42bef29677c4b71"
    assert lc[1].hash == "953737b199de233896f00b4d87a0bc2794317253"

    lc = list(
        Repository('test-repos/complex_repo',
                   only_commits=[
                       "866e997a9e44cb4ddd9e00efe49361420aff2559",
                       "57dbd017d1a744b949e7ca0b1c1a3b3dd4c1cbc1",
                       "e7d13b0511f8a176284ce4f92ed8c6e8d09c77f2"
                   ]).traverse_commits())
    assert len(lc) == 3
    assert lc[0].hash == "866e997a9e44cb4ddd9e00efe49361420aff2559"
    assert lc[1].hash == "57dbd017d1a744b949e7ca0b1c1a3b3dd4c1cbc1"
    assert lc[2].hash == "e7d13b0511f8a176284ce4f92ed8c6e8d09c77f2"

    lc = list(
        Repository('test-repos/complex_repo',
                   only_commits=["fake hash"]).traverse_commits())
    assert len(lc) == 0

    total_commits = len(
        list(Repository('test-repos/complex_repo').traverse_commits()))

    assert total_commits == 13
示例#28
0
def test_filepath_with_rename_complex():
    commits = list(
        Repository(path_to_repo='test-repos/complex_repo',
                   filepath='Matricula.javax').traverse_commits())
    assert len(commits) == 6

    commit_hashes = [commit.hash for commit in commits]

    assert 'f0dd1308bd904a9b108a6a40865166ee962af3d4' in commit_hashes
    assert '953737b199de233896f00b4d87a0bc2794317253' in commit_hashes
    assert 'a3290ac2f555eabca9e31180cf38e91f9e7e2761' in commit_hashes
    assert '71535a31f0b598a5d5fcebda7146ebc01def783a' in commit_hashes
    assert '57dbd017d1a744b949e7ca0b1c1a3b3dd4c1cbc1' in commit_hashes
    assert '866e997a9e44cb4ddd9e00efe49361420aff2559' in commit_hashes
def test_between_revisions():
    from_tag = 'tag1'
    to_tag = 'tag3'

    lc = list(
        Repository('test-repos/tags', from_tag=from_tag,
                   to_tag=to_tag).traverse_commits())

    assert len(lc) == 5
    assert '6bb9e2c6a8080e6b5b34e6e316c894b2ddbf7fcd' == lc[0].hash
    assert 'f1a90b8d7b151ceefd3e3dfc0dc1d0e12b5f48d0' == lc[1].hash
    assert '4638730126d40716e230c2040751a13153fb1556' == lc[2].hash
    assert 'a26f1438bd85d6b22497c0e5dae003812becd0bc' == lc[3].hash
    assert '627e1ad917a188a861c9fedf6e5858b79edbe439' == lc[4].hash
示例#30
0
def test_include_refs():
    commits_no_refs = list(
        Repository('test-repos/branches_not_merged/',
                   include_refs=False).traverse_commits())
    assert len(commits_no_refs) == 3
    commit_no_refs_hashes = [commit.hash for commit in commits_no_refs]

    commits_with_refs = list(
        Repository('test-repos/branches_not_merged/',
                   include_refs=True).traverse_commits())
    assert len(commits_with_refs) == 6
    commit_with_refs_hashes = [commit.hash for commit in commits_with_refs]

    commits_not_in_commits_no_refs = list(
        set(commit_with_refs_hashes) - set(commit_no_refs_hashes))
    assert len(commits_not_in_commits_no_refs) == 3

    # First commit on branch b1
    assert '87a31153090808f1e6f679a14ea28729a0b74f4d' in commits_not_in_commits_no_refs
    # Commit that branch b1 points to
    assert '702d469710d2087e662c210fd0e4f9418ec813fd' in commits_not_in_commits_no_refs
    # Commit that branch b2 points to
    assert '7203c0b8220dcc7a59614bc7549799cd203ac072' in commits_not_in_commits_no_refs