def get_words_per_comment_in_discussion(self): """ Get number of words of all comments on an issue or pull request :return: sum of all words of all comments of an issue or pull request, per pull request or issue :rtype: list """ print("#### Words/Comments in Discussions ####") comments_in_discussion, words_in_discussion = self._get_comments_in_discussion( ) words_per_comments_in_discussion = [[ 'issue', 'number_words_per_comment' ]] for key in comments_in_discussion.keys(): # print(str(key) + ': ' + str(comments_per_issue[key])) words_per_comments_in_discussion.append([ key, str(words_in_discussion[key] / comments_in_discussion[key]) ]) csv = CSVHandler() csv.write_csv(self.path + '/' + self.project + '/metrics/', self.project + '_words_per_comments_in_discussion.csv', words_per_comments_in_discussion) return words_per_comments_in_discussion
def opened_employee_or_temporary(self): """ Collect the status of the user that opened the issue/pull request. Employee if has an author_association OWNER, MEMBER, COLLABORATOR or CONTRIBUTOR. Temporary else. :return: list of the status of the user that opened the issue/pull request by issue/pull request :rtype: list """ print("#### Opened by Employee or Temporary ####") path = self.path + '/' + self.project json = JSONHandler(path + '/') issues = json.open_json(self.project + '_issues.json') pulls = json.open_json(self.project + '_pulls.json') opened_by = [['number', 'status', 'user']] for issue in issues: if 'author_association' in issue.keys(): # print(issue['author_association']) opened_by.append( [issue['issue_number'], self._employee_or_temporary(issue['author_association']), issue['user']]) for pull in pulls: if 'author_association' in pull.keys(): opened_by.append( [pull['pull_request_number'], self._employee_or_temporary(pull['author_association']), pull['user']]) csv = CSVHandler() csv.write_csv(self.path + '/' + self.project + '/metrics/', self.project + '_opened_by.csv', opened_by) return opened_by
def number_of_core_devs(self): """ Collect the amount of users with author_association equals to MEMBER and OWNER on the comments :return: list of amount of users with author_association equal to MEMBER and OWNER per issue/pull request :rtype: list """ print("#### Number of Core Members ####") core = [['id', 'count']] for k in self.users_comments.keys(): count = 0 if k in self.users_comments.keys(): for association in self.users_comments[k]: if association == 'MEMBER' or association == 'OWNER': count += 1 if k in self.users_issues.keys(): for association in self.users_issues[k]: if association == 'MEMBER' or association == 'OWNER': count += 1 core.append([k, count]) csv = CSVHandler() csv.write_csv(self.path + '/' + self.project + '/metrics/', self.project + '_number_of_core_developers.csv', core) return core
def number_of_contributors(self): """ Collect the amount of users with author_association equals to CONTRIBUTOR and COLLABORATOR on the comments :return: list of amount of users with author_association equal to CONTRIBUTOR and COLLABORATOR per issue/pull request :rtype: list """ print("#### Number of Contributors ####") contributors = [['id', 'count']] for k in self.users_comments.keys(): count = 0 if k in self.users_comments.keys(): for association in self.users_comments[k]: if association == 'CONTRIBUTOR' or association == 'COLLABORATOR': count += 1 if k in self.users_issues.keys(): for association in self.users_issues[k]: if association == 'CONTRIBUTOR' or association == 'COLLABORATOR': count += 1 contributors.append([k, count]) csv = CSVHandler() csv.write_csv(self.path + '/' + self.project + '/metrics/', self.project + '_number_of_contributors.csv', contributors) return contributors
def get_number_of_patches(self): """ Collects the number of snippets inside each comment of issues and pull requests. :return: list of the number of snippets per issue or pull request :rtype: list """ print('#### Number of Snippets ####') mypath = self.path + self.project + '/comments/individual/' json = JSONHandler(mypath) onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))] patches_in_discussion = {} for file in onlyfiles: comments = json.open_json(file) for comment in comments: if 'issue_url' in comment.keys(): issue = comment['issue_url'].split('/') issue = issue[len(issue) - 1] if issue not in patches_in_discussion.keys(): patches_in_discussion[issue] = 0 if '```' in comment['body']: patches = comment['body'].split('```') count = 0 aux = 0 if issue not in self.patches_size.keys(): self.patches_size[issue] = 0 for patch in patches: if len(patches) != 1: aux += 1 if aux % 2 != 0: continue self.patches_size[issue] += len(patch) count += 1 patches_in_discussion[issue] += count number_of_patches_in_discussion = [['issue', 'number_patches']] for key in patches_in_discussion.keys(): number_of_patches_in_discussion.append( [key, patches_in_discussion[key]]) csv = CSVHandler() csv.write_csv(self.path + '/' + self.project + '/metrics/', self.project + '_patches_in_discussion.csv', number_of_patches_in_discussion) return number_of_patches_in_discussion
def mean_time_between_replies(self): """ Collect the mean time between comments inside an issue or pull request :return: list if mean time between comments per issue/pull request :rtype: list """ print('#### Mean Time Between Comments ####') mypath = self.path + self.project + '/comments/individual/' json = JSONHandler(mypath) onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))] comments_per_issue = {} for file in onlyfiles: comments = json.open_json(file) for comment in comments: issue = comment['issue_url'].split('/') issue = issue[len(issue) - 1] if issue not in comments_per_issue.keys(): comments_per_issue[issue] = [] comments_per_issue[issue].append(comment['created_at']) date_utils = DateUtils() mean_time = [['issue', 'mean_time']] for key in comments_per_issue.keys(): days_between = [] sorted_dates = date_utils.sort_dates(comments_per_issue[key]) aux = None for date in sorted_dates: if not aux: aux = date continue days = date_utils.get_days_between_dates(aux, date) days_between.append(days) aux = date length = len(days_between) length += 1 sum_days = sum(days_between) mean_days = sum_days / length mean_time.append([key, mean_days]) csv = CSVHandler() csv.write_csv(self.path + '/' + self.project + '/metrics/', self.project + '_mean_time_between_replies.csv', mean_time) return mean_time
def get_words_in_discussion(self): _, words_in_discussion = self.get_comments_in_discussion() words_per_discussion = [['issue', 'number_words']] for key in words_in_discussion.keys(): words_per_discussion.append([key, words_in_discussion[key]]) csv = CSVHandler() csv.write_csv(self.path + '/' + self.project + '/metrics/', self.project + '_words_in_discussion.csv', words_per_discussion)
def get_words_per_comment_in_discussion(self): comments_in_discussion, words_in_discussion = self.get_comments_in_discussion( ) words_per_comments_in_discussion = [[ 'issue', 'number_words_per_comment' ]] for key in comments_in_discussion.keys(): # print(str(key) + ': ' + str(comments_per_issue[key])) words_per_comments_in_discussion.append([ key, str(words_in_discussion[key] / comments_in_discussion[key]) ]) csv = CSVHandler() csv.write_csv(self.path + '/' + self.project + '/metrics/', self.project + '_words_per_comments_in_discussion.csv', words_per_comments_in_discussion)
def get_time_in_days_between_open_and_close(self): """ Collects the time in days between the day an issue or pull request was opened and the day it was closed. :return: list of time in days per issue/pull request. :rtype: list """ print('#### Discussion Length ####') path = self.path + '/' + self.project json = JSONHandler(path + '/') issues = json.open_json(self.project + '_issues.json') pulls = json.open_json(self.project + '_pulls.json') days_between = [['number', 'status']] date_utils = DateUtils() for issue in issues: days = 0 if 'closed' in issue['state']: days = date_utils.get_days_between_dates( issue['created_at'], issue['closed_at']) # print(issue['author_association']) days_between.append([issue['issue_number'], days]) for pull in pulls: days = 0 if 'closed' in pull['state']: if pull['merged_at']: days = date_utils.get_days_between_dates( pull['created_at'], pull['merged_at']) else: days = date_utils.get_days_between_dates( pull['created_at'], pull['closed_at']) days_between.append([pull['pull_request_number'], days]) csv = CSVHandler() csv.write_csv(self.path + '/' + self.project + '/metrics/', self.project + '_discussion_length.csv', days_between) return days_between
def number_of_pull_requests(self): # Number of pull requests that contained changes to a class. mypath = self.path + self.project + '/' csv = CSVHandler() pulls_commits = csv.open_csv(mypath + 'pulls_commits/' + self.project + '_commits_by_pull_request.csv') commits_classes = csv.open_csv(mypath + self.project + '.csv') commits_in_pulls = {} for pull_commit in pulls_commits: if pull_commit[1] not in commits_in_pulls.keys(): commits_in_pulls[pull_commit[1]] = [] commits_in_pulls[pull_commit[1]].append(pull_commit[0]) renames = _get_renames(commits_classes) commits_by_class = _get_commits_by_class(renames, commits_classes) class_ids = _get_class_ids(commits_classes) pulls_by_class = {} for commit_class_key in commits_by_class.keys(): commit_list = commits_by_class[commit_class_key] for commit in commit_list: if commit in commits_in_pulls.keys(): if commit_class_key not in pulls_by_class.keys(): pulls_by_class[commit_class_key] = [] pulls_by_class[commit_class_key].extend( commits_in_pulls[commit]) num_pulls = [['class_id', 'num_pulls']] for key in pulls_by_class.keys(): pulls = pulls_by_class[key] num_pulls.append([class_ids[key], len(set(pulls))]) # TODO Save in CSV return commits_in_pulls, renames, commits_by_class, pulls_by_class, class_ids, num_pulls
def get_patch_size(self): """ Collects the size of the snippets of each comment in issues and pull requests. :return: list with the sizes of the patches per issue and pull request :rtype: list """ print('#### Snippets Size ####') size_of_patches_in_discussion = [['issue', 'size_patches']] for key in self.patches_size.keys(): #print(str(key) + ': ' + str(self.patches_size[key])) size_of_patches_in_discussion.append([key, self.patches_size[key]]) csv = CSVHandler() csv.write_csv(self.path + '/' + self.project + '/metrics/', self.project + '_patches_size_in_discussion.csv', size_of_patches_in_discussion) return size_of_patches_in_discussion
def number_associated_issues(self): # Number of issues associated to pull requests that contained changes to a class. mypath = self.path + self.project + '/' _, _, _, pulls_by_class, class_ids, _ = self.number_of_pull_requests() csv = CSVHandler() pulls_of_issues = csv.open_csv(mypath + self.project + '_pulls_of_issues.csv') associated_issues = {} for row in pulls_of_issues: if row[1] not in associated_issues.keys(): associated_issues[row[1]] = set() associated_issues[row[1]].add(row[0]) issues_by_class = {} for pull_class in pulls_by_class: pulls = pulls_by_class[pull_class] for pull in pulls: if pull_class not in issues_by_class.keys(): issues_by_class[pull_class] = 0 if pull in associated_issues.keys(): issues_by_class[pull_class] += len(associated_issues[pull]) else: issues_by_class[pull_class] += 1 issues_num = [['class_id', 'issue_num']] for key in issues_by_class.keys(): issues = issues_by_class[key] issues_num.append([class_ids[key], issues]) # TODO Save in CSV return issues_num
def get_median_of_number_of_comments(self): """ Collects the median of the number of comments inside an issue or pull requests :return: list with the median of the number of comments per issue or pull request :rtype: list """ print("#### Median Comments ####") mypath = self.path + self.project + '/comments/individual/' json = JSONHandler(mypath) onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))] comments_per_issue = {} for file in onlyfiles: comments = json.open_json(file) for comment in comments: if 'issue_url' in comment.keys(): issue = comment['issue_url'].split('/') issue = issue[len(issue) - 1] if int(issue) not in comments_per_issue: comments_per_issue[int(issue)] = 0 comments_per_issue[int(issue)] = comments_per_issue[int(issue)] + 1 values = [] median_comments = [['issue', 'median_comments']] number_comments = [['id', 'number_comments']] for key in sorted(comments_per_issue): #print(str(key) + ': ' + str(comments_per_issue[key])) values.append(comments_per_issue[key]) median_comments.append([key, median(values)]) number_comments.append([key, comments_per_issue[key]]) csv = CSVHandler() csv.write_csv(self.path + '/' + self.project + '/metrics/', self.project + '_median_comments.csv', median_comments) csv.write_csv(self.path + '/' + self.project + '/metrics/', self.project + '_number_comments.csv', number_comments) return number_comments