Пример #1
0
    def _get_first_commit_date_associated_with_service(self, service_id):
        """
        get the date of the first commit of any repository associated with a given service specified by the service_id
        Parameters
        ----------
        service_id: int

        Returns
        -------
        date
        """
        service_repo_conn = ServiceRepositoryConn(path_to_db=self.db_path)
        commit_conn = RepositoryCommitConn(path_to_db=self.db_path)
        first_commit_date = None
        service_repo_list = service_repo_conn.get_service_repository_by_service_id(
            service_id=service_id)
        for sr in service_repo_list:
            repository_id = sr.get('repository_id')
            commits = commit_conn.get_commits_by_repo(
                repository_id=repository_id)
            if len(commits) > 0:
                sorted_commits = sorted(commits, key=lambda c: c.date)
                first_commit = sorted_commits[0]
                if first_commit_date is None or first_commit_date > first_commit.date:
                    first_commit_date = first_commit.date
        return first_commit_date
Пример #2
0
 def test_get_repository_by_sha(self):
     commit_conn = RepositoryCommitConn(path_to_db=self.db_path)
     commits = list()
     repo_id = 0
     while len(commits) == 0:
         repo_id += 1
         commits = commit_conn.get_commits_by_repo(repository_id=repo_id)
     for c in random.sample(commits, 10):
         repo = self.conn.get_repository_by_sha(sha=c.sha)
         self.assertEqual(repo.repository_id, repo_id)
Пример #3
0
    def find_inconsistent_commits(self, repository_id, extensions):
        """

        Parameters
        ----------
        repository_id: int
        extensions: list of str

        Returns
        -------
        list of Commit
        """
        commit_conn = RepositoryCommitConn(self.path_to_db)
        commits = commit_conn.get_inconsistent_commits(
            repository_id=repository_id, extensions=extensions)
        return commits
Пример #4
0
class TestRepositoryCommitConn(TestCase):
    def setUp(self) -> None:
        self.db_path = os.getenv('DB_PATH')
        self.conn = RepositoryCommitConn(path_to_db=self.db_path)

    def test_insert_repository_commit(self):
        u = User(email='*****@*****.**', name='test-name', login='******')
        dt = datetime(year=2019, month=1, day=1)
        commit = Commit(date=dt, sha='test', user=u)
        repository_id = 3432432
        user_id = 124397
        commit_id = self.conn.insert_repository_commit(
            commit=commit, repository_id=repository_id, user_id=user_id)
        self.assertIsInstance(commit_id, int)
        self.conn.delete_commit(commit_id)
        commits = self.conn.get_commits_by_repo(repository_id=repository_id)
        self.assertEquals(len(commits), 0)

    def test_get_commits_by_repo(self):
        repository_id = 1
        commits = self.conn.get_commits_by_repo(repository_id=repository_id)
        self.assertTrue(all(isinstance(c, Commit) for c in commits))

    def test_get_inconsistent_commits(self):
        self.conn.get_inconsistent_commits(repository_id=1, extensions=['py'])
Пример #5
0
    def get_base_commits(self, repo: Repository, start_date, end_date):
        """
        gets a list of Commit objects that represent the base commits, that is, commits that are the base for merge operations
        Parameters
        ----------
        repo: Repository
        start_date: str
            ISO format
        end_date: str
            ISO format

        Returns
        -------
        List
        """
        commits = self.get_commits_by_repo(repository_id=repo.repository_id,
                                           start_date=start_date,
                                           end_date=end_date)
        commit_list = list()
        if commits is not None and len(commits) > 0:
            commit = commits[-1]

            if start_date is not None:
                start_dt = datetime.strptime(start_date, '%Y-%m-%d')
            else:
                start_dt = datetime(year=1000, month=1, day=1)

            if end_date is not None:
                end_dt = datetime.strptime(end_date, '%Y-%m-%d')
            else:
                end_dt = datetime(year=9999, month=12, day=1)
            commit_conn = RepositoryCommitConn(self.path_to_db)
            while commit is not None and start_dt <= commit.date < end_dt:
                new_commit = commit_conn.get_commit_and_its_filemodifications_by_sha(
                    sha=commit.sha)
                commit_list.append(new_commit)

                parent_sha = self.get_parent_commit_sha(commit=commit)
                commit = self.get_commit(sha=parent_sha)

        return commit_list
Пример #6
0
 def test_loc_counter(self):
     filemodification_conn = FileModificationConn(path_to_db=self.db_path)
     commit_conn = RepositoryCommitConn(path_to_db=self.db_path)
     shas = self.df.SHA.unique()
     for sha in shas:
         print('SHA={}'.format(sha))
         commit = commit_conn.get_commit_and_its_filemodifications_by_sha(sha=sha)
         if commit is not None:
             filemodifications = filemodification_conn. \
                 get_file_modifications_per_commit(commit=commit, including_patterns=(),
                                                   excluding_patterns=())
             subdf = self.df[self.df.SHA == sha]
             for index, row in subdf.iterrows():
                 additions = row[1]
                 deletions = row[2]
                 filename = row[3]
                 found = False
                 for fm in filemodifications:
                     if fm.filename == filename and fm.additions == additions and fm.deletions == deletions:
                         found = True
                         break
                 if not found:
                     print("Error! Not found : SHA={} additions={} deletions={} filename={}" \
                           .format(sha, additions, deletions, filename))
                 self.assertTrue(found), \
                     "Error! Not found : SHA={} additions={} deletions={} filename={}" \
                     .format(sha, additions, deletions, filename)
     commits = commit_conn.get_commits_by_repo(repository_id=)
     filesystem_mgr = FileSystemMgr(self.db_path)
     excluding_patterns = filesystem_mgr.get_excluding_patterns(service_id=,
                                                                repository_id=)
     including_patterns = filesystem_mgr.get_including_patterns(service_id=,
                                                                repository_id=)
     for c in commits:
         if c.sha not in shas:
             filemodifications = filemodification_conn. \
                 get_file_modifications_per_commit(commit=c, excluding_patterns=excluding_patterns,
                                                   including_patterns=including_patterns)
             for fm in filemodifications:
                 if fm.filename.endswith('.py') and 'merge' not in c.comment.lower():
                     print('Error! filename = {} SHA = {} date={}'.format(fm, c.sha, c.date.isoformat()))
Пример #7
0
    def delete_commits_of_repository(self, repository_id):
        """

        :param repository_id: int
        :return: bool
            True if all commits and file modifications were deleted; False otherwise
        """
        repo_conn = RepositoryConn(path_to_db=self.path_to_db)
        repo = repo_conn.get_repository(repo_id=repository_id)
        commit_counter = 0
        commits_were_deleted = False
        if repo is None:
            commits_were_deleted = True
        else:
            commits_conn = RepositoryCommitConn(path_to_db=self.path_to_db)
            file_modification_conn = FileModificationConn(
                path_to_db=self.path_to_db)
            parent_commit_conn = ParentCommitConn(path_to_db=self.path_to_db)
            parent_commit_repo_commit_conn = ParentCommitRepoCommitConn(
                path_to_db=self.path_to_db)
            commits = commits_conn.get_commits_by_repo(
                repository_id=repo.repository_id)
            for c in commits:
                file_modification_conn.delete_file_modifications(
                    commit_id=c.commit_id)
                parent_commit_id = parent_commit_conn.get_parent_commit_id(
                    sha=c.sha)
                if parent_commit_id is not None:
                    parent_commit_conn.delete_parent_commit(
                        parent_commit_id=parent_commit_id)
                    parent_commit_repo_commit_conn.delete_parent_commit_repocommit(
                        parent_commit_id=parent_commit_id)
                commits_conn.delete_commit(commit_id=c.commit_id)
                commit_counter += 1
                commits_were_deleted = True
        print('{} commits were deleted'.format(commit_counter))
        return commits_were_deleted
Пример #8
0
    def get_time_bins(self, service_names: List[str], step_size_aprox: int,
                      until: datetime) -> Tuple:
        """

        get the date of the first commit given a list of services

        Parameters
        ----------
        service_names: list of str
        step_size_aprox: int
        until: datetime

        Returns
        -------
        tuple of datetime
        """
        time_bins = list()
        repository_conn = RepositoryConn(path_to_db=self.db_path)
        commits_conn = RepositoryCommitConn(path_to_db=self.db_path)
        service_repo_conn = ServiceRepositoryConn(path_to_db=self.db_path)
        first_commit_dt = None
        for service_name in service_names:
            service_repos = service_repo_conn.get_service_repository(
                service_name=service_name)
            for service_repo in service_repos:

                repo = repository_conn.get_repository(
                    repo_id=service_repo.get('repository_id'))
                assert repo is not None, "Error! Unable to find repo of {} service".format(
                    service_name)
                commits = commits_conn.get_commits_by_repo(
                    repository_id=repo.repository_id)
                assert len(commits) > 0, "Error! empty commits = {}".format(
                    repo)
                sorted_commits = sorted(commits, key=lambda c: c.date)
                if first_commit_dt is None or \
                        (first_commit_dt is not None and first_commit_dt > sorted_commits[0].date):
                    first_commit_dt = sorted_commits[0].date

        t = first_commit_dt
        time_bins.append(first_commit_dt)

        td = until - first_commit_dt
        step_time = td / step_size_aprox
        if step_time.seconds > 60 * 60 * 12:
            num_steps = step_time.days + 1
        else:
            num_steps = step_time.days
        step_size = td / num_steps
        while t < until:
            t += step_size
            time_bins.append(t)
        time_bins = sorted(time_bins)
        assert len(time_bins) > 0, "Error! empty time bins"
        i = 1
        while i < len(time_bins):
            assert time_bins[i] > time_bins[i -
                                            1], "Error! i={} i-1={}".format(
                                                time_bins[i], time_bins[i - 1])
            i += 1
        return tuple(time_bins)
Пример #9
0
 def __init__(self, path_to_db):
     self.path_to_db = path_to_db
     self.repo_commit_conn = RepositoryCommitConn(path_to_db=path_to_db)
Пример #10
0
class CommitMgr:
    def __init__(self, path_to_db):
        self.path_to_db = path_to_db
        self.repo_commit_conn = RepositoryCommitConn(path_to_db=path_to_db)

    def insert_commit(self, repository, commit, parent_commit_shas):
        """

        Parameters
        ----------
        repository: Repository
        commit: Commit
        parent_commit_shas: list of tuple

        Returns
        -------

        """
        user_conn = UserConn(path_to_db=self.path_to_db)
        file_modification_conn = FileModificationConn(
            path_to_db=self.path_to_db)
        user = user_conn.get_user(name=commit.user.name,
                                  email=commit.user.email)
        assert user is not None, "Error! Unable to find the User"
        commit.commit_id = self.repo_commit_conn.insert_repository_commit(
            commit=commit,
            repository_id=repository.repository_id,
            user_id=user.user_id)
        # insert filemodifications
        for fm in commit.file_modifications:
            file_modification_conn.insert_file_modification(
                commit_id=commit.commit_id, fm=fm)

        parent_commit_conn = ParentCommitConn(path_to_db=self.path_to_db)
        parent_commit_repo_commit_conn = ParentCommitRepoCommitConn(
            path_to_db=self.path_to_db)

        # assure that positions are unique
        unique_position = set()
        # insert SHAs and positions into database
        for positions, parent_sha in parent_commit_shas:
            assert positions not in unique_position, "Error! Repeated positions: {}".format(
                parent_commit_shas)
            unique_position.add(positions)
            parent_commit_id = parent_commit_conn.insert_repository_commit(
                sha=parent_sha, position=positions)
            parent_commit_repo_commit_conn.insert_parent_commit_repository_commit(
                repo_commit_id=commit.commit_id,
                parent_commit_id=parent_commit_id)

    def find_inconsistent_commits(self, repository_id, extensions):
        """

        Parameters
        ----------
        repository_id: int
        extensions: list of str

        Returns
        -------
        list of Commit
        """
        commit_conn = RepositoryCommitConn(self.path_to_db)
        commits = commit_conn.get_inconsistent_commits(
            repository_id=repository_id, extensions=extensions)
        return commits

    def get_commit_by_position(self, repository_id, pos):
        """
        sorted by date, 0 is the first and -1 is the last

        Parameters
        ----------
        repository_id: int
        pos: int

        Returns
        -------
        Commit
        """
        repo_conn = RepositoryConn(path_to_db=self.path_to_db)
        repo = repo_conn.get_repository(repo_id=repository_id)
        commits = self.repo_commit_conn.get_commits_by_repo(
            repository_id=repo.repository_id)
        if len(commits) == 0:
            print('Warning!! {} has no commits'.format(repo))
            return None
        else:
            sorted_commits = sorted(commits, key=lambda k: k.date)
            commit = sorted_commits[pos]
            return commit

    def get_base_commits(self, repo: Repository, start_date, end_date):
        """
        gets a list of Commit objects that represent the base commits, that is, commits that are the base for merge operations
        Parameters
        ----------
        repo: Repository
        start_date: str
            ISO format
        end_date: str
            ISO format

        Returns
        -------
        List
        """
        commits = self.get_commits_by_repo(repository_id=repo.repository_id,
                                           start_date=start_date,
                                           end_date=end_date)
        commit_list = list()
        if commits is not None and len(commits) > 0:
            commit = commits[-1]

            if start_date is not None:
                start_dt = datetime.strptime(start_date, '%Y-%m-%d')
            else:
                start_dt = datetime(year=1000, month=1, day=1)

            if end_date is not None:
                end_dt = datetime.strptime(end_date, '%Y-%m-%d')
            else:
                end_dt = datetime(year=9999, month=12, day=1)
            commit_conn = RepositoryCommitConn(self.path_to_db)
            while commit is not None and start_dt <= commit.date < end_dt:
                new_commit = commit_conn.get_commit_and_its_filemodifications_by_sha(
                    sha=commit.sha)
                commit_list.append(new_commit)

                parent_sha = self.get_parent_commit_sha(commit=commit)
                commit = self.get_commit(sha=parent_sha)

        return commit_list

    def get_loc_per_commit(self, commit):
        """

        Parameters
        ----------
        commit: Commit

        Returns
        -------
        int
        """
        self.get_parent_commit_sha(commit=commit)

    def get_commit(self, sha):
        """

        Parameters
        ----------
        sha: str

        Returns
        -------
        Commit
        """
        commit = self.repo_commit_conn.get_commit_and_its_filemodifications_by_sha(
            sha)

        return commit

    def get_commits_by_repo(self,
                            repository_id: int,
                            start_date: str = None,
                            end_date: str = None) -> List[Commit]:
        """

        Parameters
        ----------
        repository_id: int
        start_date: str
        end_date: str

        Returns
        -------

        """
        commits_aux = self.repo_commit_conn.get_commits_by_repo(
            repository_id=repository_id,
            start_date=start_date,
            end_date=end_date)
        return commits_aux

    def get_parent_commit_sha(self, commit: Commit):
        """

        Parameters
        ----------
        commit: Commit

        Returns
        -------
        str or None
        """
        parent_commit_conn = ParentCommitConn(path_to_db=self.path_to_db)
        sha = parent_commit_conn.get_parent_commit_sha(
            child_commit_id=commit.commit_id)

        return sha

    def delete_commits_of_repository(self, repository_id):
        """

        :param repository_id: int
        :return: bool
            True if all commits and file modifications were deleted; False otherwise
        """
        repo_conn = RepositoryConn(path_to_db=self.path_to_db)
        repo = repo_conn.get_repository(repo_id=repository_id)
        commit_counter = 0
        commits_were_deleted = False
        if repo is None:
            commits_were_deleted = True
        else:
            commits_conn = RepositoryCommitConn(path_to_db=self.path_to_db)
            file_modification_conn = FileModificationConn(
                path_to_db=self.path_to_db)
            parent_commit_conn = ParentCommitConn(path_to_db=self.path_to_db)
            parent_commit_repo_commit_conn = ParentCommitRepoCommitConn(
                path_to_db=self.path_to_db)
            commits = commits_conn.get_commits_by_repo(
                repository_id=repo.repository_id)
            for c in commits:
                file_modification_conn.delete_file_modifications(
                    commit_id=c.commit_id)
                parent_commit_id = parent_commit_conn.get_parent_commit_id(
                    sha=c.sha)
                if parent_commit_id is not None:
                    parent_commit_conn.delete_parent_commit(
                        parent_commit_id=parent_commit_id)
                    parent_commit_repo_commit_conn.delete_parent_commit_repocommit(
                        parent_commit_id=parent_commit_id)
                commits_conn.delete_commit(commit_id=c.commit_id)
                commit_counter += 1
                commits_were_deleted = True
        print('{} commits were deleted'.format(commit_counter))
        return commits_were_deleted
Пример #11
0
 def setUp(self) -> None:
     self.db_path = os.getenv('DB_PATH')
     self.conn = RepositoryCommitConn(path_to_db=self.db_path)