Exemplo n.º 1
0
    def get_words_per_comment_in_discussion(self):
        """
        Get number of words of all comments on an issue or pull request
        :return: sum of all words of all comments of an issue or pull request, per pull request or issue
        :rtype: list
        """
        print("#### Words/Comments in Discussions ####")

        comments_in_discussion, words_in_discussion = self._get_comments_in_discussion(
        )

        words_per_comments_in_discussion = [[
            'issue', 'number_words_per_comment'
        ]]

        for key in comments_in_discussion.keys():
            # print(str(key) + ': ' + str(comments_per_issue[key]))
            words_per_comments_in_discussion.append([
                key,
                str(words_in_discussion[key] / comments_in_discussion[key])
            ])
        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_words_per_comments_in_discussion.csv',
                      words_per_comments_in_discussion)

        return words_per_comments_in_discussion
Exemplo n.º 2
0
    def opened_employee_or_temporary(self):
        """
        Collect the status of the user that opened the issue/pull request. Employee if has an author_association OWNER, MEMBER, COLLABORATOR or CONTRIBUTOR. Temporary else.
        :return: list of the status of the user that opened the issue/pull request by issue/pull request
        :rtype: list
        """
        print("#### Opened by Employee or Temporary ####")

        path = self.path + '/' + self.project
        json = JSONHandler(path + '/')
        issues = json.open_json(self.project + '_issues.json')
        pulls = json.open_json(self.project + '_pulls.json')

        opened_by = [['number', 'status', 'user']]
        for issue in issues:
            if 'author_association' in issue.keys():
                # print(issue['author_association'])
                opened_by.append(
                    [issue['issue_number'], self._employee_or_temporary(issue['author_association']), issue['user']])

        for pull in pulls:
            if 'author_association' in pull.keys():
                opened_by.append(
                    [pull['pull_request_number'], self._employee_or_temporary(pull['author_association']),
                     pull['user']])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_opened_by.csv',
                      opened_by)

        return opened_by
Exemplo n.º 3
0
    def number_of_core_devs(self):
        """
        Collect the amount of users with author_association equals to MEMBER and OWNER on the comments
        :return: list of amount of users with author_association equal to MEMBER and OWNER per issue/pull request
        :rtype: list
        """
        print("#### Number of Core Members ####")

        core = [['id', 'count']]
        for k in self.users_comments.keys():
            count = 0
            if k in self.users_comments.keys():
                for association in self.users_comments[k]:
                    if association == 'MEMBER' or association == 'OWNER':
                        count += 1
            if k in self.users_issues.keys():
                for association in self.users_issues[k]:
                    if association == 'MEMBER' or association == 'OWNER':
                        count += 1
            core.append([k, count])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_number_of_core_developers.csv', core)

        return core
Exemplo n.º 4
0
    def number_of_contributors(self):
        """
        Collect the amount of users with author_association equals to CONTRIBUTOR and COLLABORATOR on the comments
        :return: list of amount of users with author_association equal to CONTRIBUTOR and COLLABORATOR per issue/pull request
        :rtype: list
        """
        print("#### Number of Contributors ####")

        contributors = [['id', 'count']]
        for k in self.users_comments.keys():
            count = 0
            if k in self.users_comments.keys():
                for association in self.users_comments[k]:
                    if association == 'CONTRIBUTOR' or association == 'COLLABORATOR':
                        count += 1
            if k in self.users_issues.keys():
                for association in self.users_issues[k]:
                    if association == 'CONTRIBUTOR' or association == 'COLLABORATOR':
                        count += 1
            contributors.append([k, count])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_number_of_contributors.csv',
                      contributors)

        return contributors
Exemplo n.º 5
0
    def get_number_of_patches(self):
        """
        Collects the number of snippets inside each comment of issues and pull requests.

        :return: list of the number of snippets per issue or pull request
        :rtype: list
        """
        print('#### Number of Snippets ####')

        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)

        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        patches_in_discussion = {}
        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                if 'issue_url' in comment.keys():
                    issue = comment['issue_url'].split('/')
                    issue = issue[len(issue) - 1]

                    if issue not in patches_in_discussion.keys():
                        patches_in_discussion[issue] = 0

                    if '```' in comment['body']:
                        patches = comment['body'].split('```')
                        count = 0
                        aux = 0
                        if issue not in self.patches_size.keys():
                            self.patches_size[issue] = 0
                        for patch in patches:

                            if len(patches) != 1:
                                aux += 1
                                if aux % 2 != 0:
                                    continue

                            self.patches_size[issue] += len(patch)

                            count += 1
                        patches_in_discussion[issue] += count

        number_of_patches_in_discussion = [['issue', 'number_patches']]

        for key in patches_in_discussion.keys():
            number_of_patches_in_discussion.append(
                [key, patches_in_discussion[key]])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_patches_in_discussion.csv',
                      number_of_patches_in_discussion)

        return number_of_patches_in_discussion
Exemplo n.º 6
0
    def mean_time_between_replies(self):
        """
        Collect the mean time between comments inside an issue or pull request
        :return: list if mean time between comments per issue/pull request
        :rtype: list
        """
        print('#### Mean Time Between Comments ####')

        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)
        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        comments_per_issue = {}

        for file in onlyfiles:
            comments = json.open_json(file)

            for comment in comments:
                issue = comment['issue_url'].split('/')
                issue = issue[len(issue) - 1]

                if issue not in comments_per_issue.keys():
                    comments_per_issue[issue] = []

                comments_per_issue[issue].append(comment['created_at'])

        date_utils = DateUtils()
        mean_time = [['issue', 'mean_time']]
        for key in comments_per_issue.keys():
            days_between = []
            sorted_dates = date_utils.sort_dates(comments_per_issue[key])
            aux = None
            for date in sorted_dates:
                if not aux:
                    aux = date
                    continue

                days = date_utils.get_days_between_dates(aux, date)
                days_between.append(days)
                aux = date

            length = len(days_between)

            length += 1

            sum_days = sum(days_between)
            mean_days = sum_days / length
            mean_time.append([key, mean_days])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_mean_time_between_replies.csv',
                      mean_time)

        return mean_time
Exemplo n.º 7
0
    def get_words_in_discussion(self):

        _, words_in_discussion = self.get_comments_in_discussion()

        words_per_discussion = [['issue', 'number_words']]

        for key in words_in_discussion.keys():
            words_per_discussion.append([key, words_in_discussion[key]])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_words_in_discussion.csv',
                      words_per_discussion)
    def get_median_of_number_of_comments(self):
        """
        Collects the median of the number of comments inside an issue or pull requests
        :return: list with the median of the number of comments per issue or pull request
        :rtype: list
        """
        print("#### Median Comments ####")

        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)

        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        comments_per_issue = {}
        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                if 'issue_url' in comment.keys():
                    issue = comment['issue_url'].split('/')
                    issue = issue[len(issue) - 1]
                    if int(issue) not in comments_per_issue:
                        comments_per_issue[int(issue)] = 0
                    comments_per_issue[int(issue)] = comments_per_issue[int(issue)] + 1

        values = []
        median_comments = [['issue', 'median_comments']]
        number_comments = [['id', 'number_comments']]


        for key in sorted(comments_per_issue):
            #print(str(key) + ': ' + str(comments_per_issue[key]))
            values.append(comments_per_issue[key])
            median_comments.append([key, median(values)])
            number_comments.append([key, comments_per_issue[key]])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_median_comments.csv',
                      median_comments)

        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_number_comments.csv',
                      number_comments)

        return number_comments
Exemplo n.º 9
0
    def get_words_per_comment_in_discussion(self):
        comments_in_discussion, words_in_discussion = self.get_comments_in_discussion(
        )

        words_per_comments_in_discussion = [[
            'issue', 'number_words_per_comment'
        ]]

        for key in comments_in_discussion.keys():
            # print(str(key) + ': ' + str(comments_per_issue[key]))
            words_per_comments_in_discussion.append([
                key,
                str(words_in_discussion[key] / comments_in_discussion[key])
            ])
        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_words_per_comments_in_discussion.csv',
                      words_per_comments_in_discussion)
Exemplo n.º 10
0
    def get_time_in_days_between_open_and_close(self):
        """
        Collects the time in days between the day an issue or pull request was opened and the day it was closed.
        :return: list of time in days per issue/pull request.
        :rtype: list
        """
        print('#### Discussion Length ####')

        path = self.path + '/' + self.project
        json = JSONHandler(path + '/')
        issues = json.open_json(self.project + '_issues.json')
        pulls = json.open_json(self.project + '_pulls.json')

        days_between = [['number', 'status']]

        date_utils = DateUtils()
        for issue in issues:
            days = 0

            if 'closed' in issue['state']:
                days = date_utils.get_days_between_dates(
                    issue['created_at'], issue['closed_at'])
                # print(issue['author_association'])
                days_between.append([issue['issue_number'], days])

        for pull in pulls:
            days = 0
            if 'closed' in pull['state']:
                if pull['merged_at']:
                    days = date_utils.get_days_between_dates(
                        pull['created_at'], pull['merged_at'])
                else:
                    days = date_utils.get_days_between_dates(
                        pull['created_at'], pull['closed_at'])

                days_between.append([pull['pull_request_number'], days])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_discussion_length.csv', days_between)

        return days_between
Exemplo n.º 11
0
    def get_patch_size(self):
        """
        Collects the size of the snippets of each comment in issues and pull requests.

        :return: list with the sizes of the patches per issue and pull request
        :rtype: list
        """
        print('#### Snippets Size ####')

        size_of_patches_in_discussion = [['issue', 'size_patches']]

        for key in self.patches_size.keys():
            #print(str(key) + ': ' + str(self.patches_size[key]))
            size_of_patches_in_discussion.append([key, self.patches_size[key]])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_patches_size_in_discussion.csv',
                      size_of_patches_in_discussion)

        return size_of_patches_in_discussion