Пример #1
0
    def _get_users_labels_in_issues_and_pulls(self):
        """
        Collects the author_association of issue/pull requests (opened, closed or merged)
        :return: lists of author_associations per issue/pull requests
        :rtype: list
        """
        path = self.path + '/' + self.project
        json = JSONHandler(path + '/')
        issues = json.open_json(self.project + '_issues.json')
        pulls = json.open_json(self.project + '_pulls.json')

        # author_association = [['id', 'association', 'created_at']]
        author_association = {}
        for issue in issues:
            if issue['author_association']:
                # author_association.append([issue['issue_number'], issue['author_association'], issue['created_at']])
                author_association[
                    issue['issue_number']] = issue['author_association']

        for pull in pulls:
            if pull['author_association']:
                author_association[
                    pull['pull_request_number']] = pull['author_association']
                # author_association.append([pull['pull_request_number'], pull['author_association'], pull['created_at']])

        return author_association
Пример #2
0
    def opened_employee_or_temporary(self):
        """
        Collect the status of the user that opened the issue/pull request. Employee if has an author_association OWNER, MEMBER, COLLABORATOR or CONTRIBUTOR. Temporary else.
        :return: list of the status of the user that opened the issue/pull request by issue/pull request
        :rtype: list
        """
        print("#### Opened by Employee or Temporary ####")

        path = self.path + '/' + self.project
        json = JSONHandler(path + '/')
        issues = json.open_json(self.project + '_issues.json')
        pulls = json.open_json(self.project + '_pulls.json')

        opened_by = [['number', 'status', 'user']]
        for issue in issues:
            if 'author_association' in issue.keys():
                # print(issue['author_association'])
                opened_by.append(
                    [issue['issue_number'], self._employee_or_temporary(issue['author_association']), issue['user']])

        for pull in pulls:
            if 'author_association' in pull.keys():
                opened_by.append(
                    [pull['pull_request_number'], self._employee_or_temporary(pull['author_association']),
                     pull['user']])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_opened_by.csv',
                      opened_by)

        return opened_by
Пример #3
0
    def collect_commits_on_pulls(self, owner: str, project: str):
        """
        Collect Commits from Pull Requests from the GitHub API
        :param owner: repository owner
        :type owner: str
        :param project: project name
        :type project: str
        :return: list of commits from pull requests
        :rtype: list
        """
        print('Collecting Pull Requests Commits')

        pulls = []
        mypath = self.config['output_path'] + project + '/pulls/all/'
        json = JSONHandler(mypath)
        commits_json = JSONHandler(self.config['output_path'] + project + '/pulls_commits/commits/')
        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        for file in onlyfiles:
            batch = json.open_json(file)
            for pull in batch:
                pulls.append(pull['number'])

        hashs = []

        for pull in pulls:
            if JSONHandler.file_exists(
                    self.config['output_path'] + project + '/pulls_commits/commits/' + str(pull) + '.json'):
                commits_pull = commits_json.open_json(
                    str(pull) + '.json')
                for commit_pull in commits_pull:
                    for commit in commit_pull:
                        hashs.append(commit['sha'])
                continue

            pullsEndpoint = PrototypeAPI(owner, project, '/pulls_commits/', '/pulls/' + str(pull) + '/commits')
            files = pullsEndpoint.collect_batch(False)
            commits_json.save_json(files, str(pull))

        commitsEndpoint = PrototypeAPI(owner, project, '/pulls_commits/', '/commits')
        aux = 1
        for hash in hashs:
            if not hash:
                continue
            commitsEndpoint.collect_single(hash)
            print(str(aux * 100 / len(hashs)) + "%")
            aux = aux + 1

        return hashs
Пример #4
0
    def _get_comments_in_discussion(self):
        """
        Collect comments of each issue and pull request and the number of words of each comment
        :return: two lists, one containing the comments in issues/pull requests and another with the words per issue/pull request.
        :rtype: list, list
        """
        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)
        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
        words_in_discussion = {}
        comments_in_discussion = {}

        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                if 'issue_url' in comment.keys():
                    issue = comment['issue_url'].split('/')
                    issue = issue[len(issue) - 1]

                    if issue not in words_in_discussion.keys():
                        words_in_discussion[issue] = 0
                    if issue not in comments_in_discussion.keys():
                        comments_in_discussion[issue] = 0

                    tp = TextProcessing()
                    processed = tp.pre_process_text(comment['body'])
                    comment_text = ''
                    for token in processed:
                        comment_text += token + ' '
                    words_in_discussion[issue] += len(comment_text.split(' '))
                    comments_in_discussion[issue] += 1

        return comments_in_discussion, words_in_discussion
Пример #5
0
    def _get_users_labels_in_comments(self):
        """
        Collects the author_association of each comment on issue/pull requests
        :return: lists of author_associations per issue/pull requests
        :rtype: list
        """
        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)

        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        users = {}
        hash = {}
        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                issue = comment['issue_url'].split('/')
                issue = issue[len(issue) - 1]

                if issue not in users.keys():
                    users[issue] = []

                if str(issue + comment['user']['login']) not in hash.keys():
                    hash[issue + comment['user']['login']] = 0
                    users[issue].append(comment['author_association'])

        return users
Пример #6
0
    def get_comments_in_discussion(self):
        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)
        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
        words_in_discussion = {}
        comments_in_discussion = {}

        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                if 'issue_url' in comment.keys():
                    issue = comment['issue_url'].split('/')
                    issue = issue[len(issue) - 1]

                    if issue not in words_in_discussion.keys():
                        words_in_discussion[issue] = 0
                    if issue not in comments_in_discussion.keys():
                        comments_in_discussion[issue] = 0

                    tp = TextProcessing()
                    processed = tp.pre_process_text(comment['body'])
                    comment_text = ''
                    for token in processed:
                        comment_text += token + ' '
                    words_in_discussion[issue] += len(comment_text.split(' '))
                    comments_in_discussion[issue] += 1

        return comments_in_discussion, words_in_discussion
Пример #7
0
    def mean_time_between_replies(self):
        """
        Collect the mean time between comments inside an issue or pull request
        :return: list if mean time between comments per issue/pull request
        :rtype: list
        """
        print('#### Mean Time Between Comments ####')

        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)
        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        comments_per_issue = {}

        for file in onlyfiles:
            comments = json.open_json(file)

            for comment in comments:
                issue = comment['issue_url'].split('/')
                issue = issue[len(issue) - 1]

                if issue not in comments_per_issue.keys():
                    comments_per_issue[issue] = []

                comments_per_issue[issue].append(comment['created_at'])

        date_utils = DateUtils()
        mean_time = [['issue', 'mean_time']]
        for key in comments_per_issue.keys():
            days_between = []
            sorted_dates = date_utils.sort_dates(comments_per_issue[key])
            aux = None
            for date in sorted_dates:
                if not aux:
                    aux = date
                    continue

                days = date_utils.get_days_between_dates(aux, date)
                days_between.append(days)
                aux = date

            length = len(days_between)

            length += 1

            sum_days = sum(days_between)
            mean_days = sum_days / length
            mean_time.append([key, mean_days])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_mean_time_between_replies.csv',
                      mean_time)

        return mean_time
Пример #8
0
    def get_number_of_patches(self):
        """
        Collects the number of snippets inside each comment of issues and pull requests.

        :return: list of the number of snippets per issue or pull request
        :rtype: list
        """
        print('#### Number of Snippets ####')

        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)

        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        patches_in_discussion = {}
        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                if 'issue_url' in comment.keys():
                    issue = comment['issue_url'].split('/')
                    issue = issue[len(issue) - 1]

                    if issue not in patches_in_discussion.keys():
                        patches_in_discussion[issue] = 0

                    if '```' in comment['body']:
                        patches = comment['body'].split('```')
                        count = 0
                        aux = 0
                        if issue not in self.patches_size.keys():
                            self.patches_size[issue] = 0
                        for patch in patches:

                            if len(patches) != 1:
                                aux += 1
                                if aux % 2 != 0:
                                    continue

                            self.patches_size[issue] += len(patch)

                            count += 1
                        patches_in_discussion[issue] += count

        number_of_patches_in_discussion = [['issue', 'number_patches']]

        for key in patches_in_discussion.keys():
            number_of_patches_in_discussion.append(
                [key, patches_in_discussion[key]])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_patches_in_discussion.csv',
                      number_of_patches_in_discussion)

        return number_of_patches_in_discussion
Пример #9
0
    def insert_commits_from_pulls(self):
        database = self.database['commits']

        pulls_commits_path = self.path + 'pulls_commits/commits/'

        json = JSONHandler(pulls_commits_path)
        commits = [f for f in listdir(pulls_commits_path) if isfile(join(pulls_commits_path, f))]

        commit_pulls = {}
        for file in commits:
            commit_batch = json.open_json(file)
            for commit_list in commit_batch:
                for commit in commit_list:
                    if commit['sha'] not in commit_pulls.keys():
                        commit_pulls[commit['sha']] = []

                    commit_pulls[commit['sha']].append(file.split('.')[0])

        pulls_commits_path = self.path + 'pulls_commits/individual/'

        json = JSONHandler(pulls_commits_path)
        commits = [f for f in listdir(pulls_commits_path) if isfile(join(pulls_commits_path, f))]

        for file in commits:

            commit = json.open_json(file)

            if database.find_one({'sha': commit['sha']}):
                database.update_one({'sha': commit['sha']}, {"$set": {'from_pull': True}})

                if commit['sha'] in commit_pulls.keys():
                    database.update_one({'sha': commit['sha']}, {"$set": {'pull_origin': commit_pulls[commit['sha']]}})

                continue

            commit['from_pull'] = True
            commit['pull_origin'] = []

            if commit['sha'] in commit_pulls.keys():
                commit['pull_origin'] = commit_pulls[commit['sha']]

            database.insert_one(commit)
Пример #10
0
    def get_time_in_days_between_open_and_close(self):
        """
        Collects the time in days between the day an issue or pull request was opened and the day it was closed.
        :return: list of time in days per issue/pull request.
        :rtype: list
        """
        print('#### Discussion Length ####')

        path = self.path + '/' + self.project
        json = JSONHandler(path + '/')
        issues = json.open_json(self.project + '_issues.json')
        pulls = json.open_json(self.project + '_pulls.json')

        days_between = [['number', 'status']]

        date_utils = DateUtils()
        for issue in issues:
            days = 0

            if 'closed' in issue['state']:
                days = date_utils.get_days_between_dates(
                    issue['created_at'], issue['closed_at'])
                # print(issue['author_association'])
                days_between.append([issue['issue_number'], days])

        for pull in pulls:
            days = 0
            if 'closed' in pull['state']:
                if pull['merged_at']:
                    days = date_utils.get_days_between_dates(
                        pull['created_at'], pull['merged_at'])
                else:
                    days = date_utils.get_days_between_dates(
                        pull['created_at'], pull['closed_at'])

                days_between.append([pull['pull_request_number'], days])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_discussion_length.csv', days_between)

        return days_between
Пример #11
0
    def insert_comments(self):
        database = self.database['comments']

        comments_path = self.path + 'comments/individual/'

        json = JSONHandler(comments_path)
        comments = [f for f in listdir(comments_path) if isfile(join(comments_path, f))]

        for file in comments:

            comment_batch = json.open_json(file)

            for comment in comment_batch:

                if database.find_one({'id': comment['id']}):
                    continue

                issue_number = comment['issue_url'].split('issues/')[1]

                comment['issue_number'] = int(issue_number)

                database.insert_one(comment)

        comments_path = self.path + 'comments/issues/all/'

        json = JSONHandler(comments_path)
        comments = [f for f in listdir(comments_path) if isfile(join(comments_path, f))]

        for file in comments:

            comment_batch = json.open_json(file)

            for comment in comment_batch:

                if database.find_one({'id': comment['id']}):
                    continue

                issue_number = comment['issue_url'].split('issues/')[1]

                comment['issue_number'] = int(issue_number)

                database.insert_one(comment)
Пример #12
0
    def insert_pulls(self):
        database = self.database['pull_requests']

        pulls_path = self.path + 'pulls/individual/'

        json = JSONHandler(pulls_path)
        pulls = [f for f in listdir(pulls_path) if isfile(join(pulls_path, f))]

        for file in pulls:

            pull = json.open_json(file)

            if database.find_one({'number': pull['number']}):
                continue

            database.insert_one(pull)
Пример #13
0
    def insert_issues(self):
        database = self.database['issues']

        issues_path = self.path + 'issues/individual/'

        json = JSONHandler(issues_path)
        issues = [f for f in listdir(issues_path) if isfile(join(issues_path, f))]

        for file in issues:

            issue = json.open_json(file)

            if database.find_one({'number': issue['number']}):
                continue

            database.insert_one(issue)
    def get_median_of_number_of_comments(self):
        """
        Collects the median of the number of comments inside an issue or pull requests
        :return: list with the median of the number of comments per issue or pull request
        :rtype: list
        """
        print("#### Median Comments ####")

        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)

        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        comments_per_issue = {}
        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                if 'issue_url' in comment.keys():
                    issue = comment['issue_url'].split('/')
                    issue = issue[len(issue) - 1]
                    if int(issue) not in comments_per_issue:
                        comments_per_issue[int(issue)] = 0
                    comments_per_issue[int(issue)] = comments_per_issue[int(issue)] + 1

        values = []
        median_comments = [['issue', 'median_comments']]
        number_comments = [['id', 'number_comments']]


        for key in sorted(comments_per_issue):
            #print(str(key) + ': ' + str(comments_per_issue[key]))
            values.append(comments_per_issue[key])
            median_comments.append([key, median(values)])
            number_comments.append([key, comments_per_issue[key]])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_median_comments.csv',
                      median_comments)

        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_number_comments.csv',
                      number_comments)

        return number_comments
Пример #15
0
    def insert_commits(self):
        database = self.database['commits']

        commits_path = self.path + 'commits/individual/'

        json = JSONHandler(commits_path)
        commits = [f for f in listdir(commits_path) if isfile(join(commits_path, f))]

        for file in commits:

            commit = json.open_json(file)

            if database.find_one({'sha': commit['sha']}):
                continue

            commit['from_pull'] = False
            commit['pull_origin'] = []

            database.insert_one(commit)
Пример #16
0
    def insert_events(self):
        database = self.database['events']

        events_path = self.path + 'events/all/'

        json = JSONHandler(events_path)
        events = [f for f in listdir(events_path) if isfile(join(events_path, f))]

        for file in events:

            event_batch = json.open_json(file)
            for event in event_batch:

                if database.find_one({'id': event['id']}):
                    continue

                database.insert_one(event)

        pass
Пример #17
0
    def __init__(self):

        json_handler = JSONHandler('C:/Users/gurio/PycharmProjects/GHPyFramework/')

        self.config = json_handler.open_json('config.json')
        self.projects = self.config['projects']