Пример #1
0
    def get_words_per_comment_in_discussion(self):
        """
        Get number of words of all comments on an issue or pull request
        :return: sum of all words of all comments of an issue or pull request, per pull request or issue
        :rtype: list
        """
        print("#### Words/Comments in Discussions ####")

        comments_in_discussion, words_in_discussion = self._get_comments_in_discussion(
        )

        words_per_comments_in_discussion = [[
            'issue', 'number_words_per_comment'
        ]]

        for key in comments_in_discussion.keys():
            # print(str(key) + ': ' + str(comments_per_issue[key]))
            words_per_comments_in_discussion.append([
                key,
                str(words_in_discussion[key] / comments_in_discussion[key])
            ])
        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_words_per_comments_in_discussion.csv',
                      words_per_comments_in_discussion)

        return words_per_comments_in_discussion
Пример #2
0
    def opened_employee_or_temporary(self):
        """
        Collect the status of the user that opened the issue/pull request. Employee if has an author_association OWNER, MEMBER, COLLABORATOR or CONTRIBUTOR. Temporary else.
        :return: list of the status of the user that opened the issue/pull request by issue/pull request
        :rtype: list
        """
        print("#### Opened by Employee or Temporary ####")

        path = self.path + '/' + self.project
        json = JSONHandler(path + '/')
        issues = json.open_json(self.project + '_issues.json')
        pulls = json.open_json(self.project + '_pulls.json')

        opened_by = [['number', 'status', 'user']]
        for issue in issues:
            if 'author_association' in issue.keys():
                # print(issue['author_association'])
                opened_by.append(
                    [issue['issue_number'], self._employee_or_temporary(issue['author_association']), issue['user']])

        for pull in pulls:
            if 'author_association' in pull.keys():
                opened_by.append(
                    [pull['pull_request_number'], self._employee_or_temporary(pull['author_association']),
                     pull['user']])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_opened_by.csv',
                      opened_by)

        return opened_by
Пример #3
0
    def number_of_core_devs(self):
        """
        Collect the amount of users with author_association equals to MEMBER and OWNER on the comments
        :return: list of amount of users with author_association equal to MEMBER and OWNER per issue/pull request
        :rtype: list
        """
        print("#### Number of Core Members ####")

        core = [['id', 'count']]
        for k in self.users_comments.keys():
            count = 0
            if k in self.users_comments.keys():
                for association in self.users_comments[k]:
                    if association == 'MEMBER' or association == 'OWNER':
                        count += 1
            if k in self.users_issues.keys():
                for association in self.users_issues[k]:
                    if association == 'MEMBER' or association == 'OWNER':
                        count += 1
            core.append([k, count])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_number_of_core_developers.csv', core)

        return core
Пример #4
0
    def number_of_contributors(self):
        """
        Collect the amount of users with author_association equals to CONTRIBUTOR and COLLABORATOR on the comments
        :return: list of amount of users with author_association equal to CONTRIBUTOR and COLLABORATOR per issue/pull request
        :rtype: list
        """
        print("#### Number of Contributors ####")

        contributors = [['id', 'count']]
        for k in self.users_comments.keys():
            count = 0
            if k in self.users_comments.keys():
                for association in self.users_comments[k]:
                    if association == 'CONTRIBUTOR' or association == 'COLLABORATOR':
                        count += 1
            if k in self.users_issues.keys():
                for association in self.users_issues[k]:
                    if association == 'CONTRIBUTOR' or association == 'COLLABORATOR':
                        count += 1
            contributors.append([k, count])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_number_of_contributors.csv',
                      contributors)

        return contributors
Пример #5
0
    def get_number_of_patches(self):
        """
        Collects the number of snippets inside each comment of issues and pull requests.

        :return: list of the number of snippets per issue or pull request
        :rtype: list
        """
        print('#### Number of Snippets ####')

        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)

        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        patches_in_discussion = {}
        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                if 'issue_url' in comment.keys():
                    issue = comment['issue_url'].split('/')
                    issue = issue[len(issue) - 1]

                    if issue not in patches_in_discussion.keys():
                        patches_in_discussion[issue] = 0

                    if '```' in comment['body']:
                        patches = comment['body'].split('```')
                        count = 0
                        aux = 0
                        if issue not in self.patches_size.keys():
                            self.patches_size[issue] = 0
                        for patch in patches:

                            if len(patches) != 1:
                                aux += 1
                                if aux % 2 != 0:
                                    continue

                            self.patches_size[issue] += len(patch)

                            count += 1
                        patches_in_discussion[issue] += count

        number_of_patches_in_discussion = [['issue', 'number_patches']]

        for key in patches_in_discussion.keys():
            number_of_patches_in_discussion.append(
                [key, patches_in_discussion[key]])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_patches_in_discussion.csv',
                      number_of_patches_in_discussion)

        return number_of_patches_in_discussion
Пример #6
0
    def mean_time_between_replies(self):
        """
        Collect the mean time between comments inside an issue or pull request
        :return: list if mean time between comments per issue/pull request
        :rtype: list
        """
        print('#### Mean Time Between Comments ####')

        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)
        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        comments_per_issue = {}

        for file in onlyfiles:
            comments = json.open_json(file)

            for comment in comments:
                issue = comment['issue_url'].split('/')
                issue = issue[len(issue) - 1]

                if issue not in comments_per_issue.keys():
                    comments_per_issue[issue] = []

                comments_per_issue[issue].append(comment['created_at'])

        date_utils = DateUtils()
        mean_time = [['issue', 'mean_time']]
        for key in comments_per_issue.keys():
            days_between = []
            sorted_dates = date_utils.sort_dates(comments_per_issue[key])
            aux = None
            for date in sorted_dates:
                if not aux:
                    aux = date
                    continue

                days = date_utils.get_days_between_dates(aux, date)
                days_between.append(days)
                aux = date

            length = len(days_between)

            length += 1

            sum_days = sum(days_between)
            mean_days = sum_days / length
            mean_time.append([key, mean_days])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_mean_time_between_replies.csv',
                      mean_time)

        return mean_time
Пример #7
0
    def get_words_in_discussion(self):

        _, words_in_discussion = self.get_comments_in_discussion()

        words_per_discussion = [['issue', 'number_words']]

        for key in words_in_discussion.keys():
            words_per_discussion.append([key, words_in_discussion[key]])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_words_in_discussion.csv',
                      words_per_discussion)
Пример #8
0
    def get_words_per_comment_in_discussion(self):
        comments_in_discussion, words_in_discussion = self.get_comments_in_discussion(
        )

        words_per_comments_in_discussion = [[
            'issue', 'number_words_per_comment'
        ]]

        for key in comments_in_discussion.keys():
            # print(str(key) + ': ' + str(comments_per_issue[key]))
            words_per_comments_in_discussion.append([
                key,
                str(words_in_discussion[key] / comments_in_discussion[key])
            ])
        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_words_per_comments_in_discussion.csv',
                      words_per_comments_in_discussion)
Пример #9
0
    def get_time_in_days_between_open_and_close(self):
        """
        Collects the time in days between the day an issue or pull request was opened and the day it was closed.
        :return: list of time in days per issue/pull request.
        :rtype: list
        """
        print('#### Discussion Length ####')

        path = self.path + '/' + self.project
        json = JSONHandler(path + '/')
        issues = json.open_json(self.project + '_issues.json')
        pulls = json.open_json(self.project + '_pulls.json')

        days_between = [['number', 'status']]

        date_utils = DateUtils()
        for issue in issues:
            days = 0

            if 'closed' in issue['state']:
                days = date_utils.get_days_between_dates(
                    issue['created_at'], issue['closed_at'])
                # print(issue['author_association'])
                days_between.append([issue['issue_number'], days])

        for pull in pulls:
            days = 0
            if 'closed' in pull['state']:
                if pull['merged_at']:
                    days = date_utils.get_days_between_dates(
                        pull['created_at'], pull['merged_at'])
                else:
                    days = date_utils.get_days_between_dates(
                        pull['created_at'], pull['closed_at'])

                days_between.append([pull['pull_request_number'], days])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_discussion_length.csv', days_between)

        return days_between
Пример #10
0
    def number_of_pull_requests(self):
        # Number of pull requests that contained changes to a class.
        mypath = self.path + self.project + '/'

        csv = CSVHandler()
        pulls_commits = csv.open_csv(mypath + 'pulls_commits/' + self.project +
                                     '_commits_by_pull_request.csv')
        commits_classes = csv.open_csv(mypath + self.project + '.csv')

        commits_in_pulls = {}
        for pull_commit in pulls_commits:
            if pull_commit[1] not in commits_in_pulls.keys():
                commits_in_pulls[pull_commit[1]] = []

            commits_in_pulls[pull_commit[1]].append(pull_commit[0])

        renames = _get_renames(commits_classes)

        commits_by_class = _get_commits_by_class(renames, commits_classes)

        class_ids = _get_class_ids(commits_classes)

        pulls_by_class = {}
        for commit_class_key in commits_by_class.keys():
            commit_list = commits_by_class[commit_class_key]
            for commit in commit_list:
                if commit in commits_in_pulls.keys():
                    if commit_class_key not in pulls_by_class.keys():
                        pulls_by_class[commit_class_key] = []

                    pulls_by_class[commit_class_key].extend(
                        commits_in_pulls[commit])

        num_pulls = [['class_id', 'num_pulls']]
        for key in pulls_by_class.keys():
            pulls = pulls_by_class[key]
            num_pulls.append([class_ids[key], len(set(pulls))])

        # TODO Save in CSV

        return commits_in_pulls, renames, commits_by_class, pulls_by_class, class_ids, num_pulls
Пример #11
0
    def get_patch_size(self):
        """
        Collects the size of the snippets of each comment in issues and pull requests.

        :return: list with the sizes of the patches per issue and pull request
        :rtype: list
        """
        print('#### Snippets Size ####')

        size_of_patches_in_discussion = [['issue', 'size_patches']]

        for key in self.patches_size.keys():
            #print(str(key) + ': ' + str(self.patches_size[key]))
            size_of_patches_in_discussion.append([key, self.patches_size[key]])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_patches_size_in_discussion.csv',
                      size_of_patches_in_discussion)

        return size_of_patches_in_discussion
Пример #12
0
    def number_associated_issues(self):
        # Number of issues associated to pull requests that contained changes to a class.

        mypath = self.path + self.project + '/'
        _, _, _, pulls_by_class, class_ids, _ = self.number_of_pull_requests()

        csv = CSVHandler()
        pulls_of_issues = csv.open_csv(mypath + self.project +
                                       '_pulls_of_issues.csv')

        associated_issues = {}

        for row in pulls_of_issues:
            if row[1] not in associated_issues.keys():
                associated_issues[row[1]] = set()

            associated_issues[row[1]].add(row[0])

        issues_by_class = {}
        for pull_class in pulls_by_class:
            pulls = pulls_by_class[pull_class]
            for pull in pulls:
                if pull_class not in issues_by_class.keys():
                    issues_by_class[pull_class] = 0

                if pull in associated_issues.keys():
                    issues_by_class[pull_class] += len(associated_issues[pull])
                else:
                    issues_by_class[pull_class] += 1

        issues_num = [['class_id', 'issue_num']]
        for key in issues_by_class.keys():
            issues = issues_by_class[key]
            issues_num.append([class_ids[key], issues])

        # TODO Save in CSV

        return issues_num
    def get_median_of_number_of_comments(self):
        """
        Collects the median of the number of comments inside an issue or pull requests
        :return: list with the median of the number of comments per issue or pull request
        :rtype: list
        """
        print("#### Median Comments ####")

        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)

        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        comments_per_issue = {}
        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                if 'issue_url' in comment.keys():
                    issue = comment['issue_url'].split('/')
                    issue = issue[len(issue) - 1]
                    if int(issue) not in comments_per_issue:
                        comments_per_issue[int(issue)] = 0
                    comments_per_issue[int(issue)] = comments_per_issue[int(issue)] + 1

        values = []
        median_comments = [['issue', 'median_comments']]
        number_comments = [['id', 'number_comments']]


        for key in sorted(comments_per_issue):
            #print(str(key) + ': ' + str(comments_per_issue[key]))
            values.append(comments_per_issue[key])
            median_comments.append([key, median(values)])
            number_comments.append([key, comments_per_issue[key]])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_median_comments.csv',
                      median_comments)

        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_number_comments.csv',
                      number_comments)

        return number_comments