def get_commits(project_name): """ Get commits in temporal order. Merge commits are excluded. Parameters ---------- project_name (str): Name of the project. "<project_name>.sqlite3" has to be in data folder. Returns ------- list: Tuples of commit hash, author and date in temporal order. For example, "[(commit1, author1, 12Oct2013), (commit2, author1, 19Oct2013)]" """ query_results = execute_db_query( "data/{}.sqlite3".format(project_name), """ SELECT commit_hash, author, committed_date FROM change_set WHERE is_merge=0 ORDER BY committed_date """, ) return [(commit_hash, author, committed_date) for commit_hash, author, committed_date in query_results]
def get_commit_to_codechanges(project_name): """ Get a mapping from commit hash to code changes. Parameters ---------- project_name (str): Name of the project. "<project_name>.sqlite3" has to be in data folder. Returns ------- dict: Mapping from commit hash to code changes. """ query_results = execute_db_query( "data/{}.sqlite3".format(project_name), """ SELECT commit_hash, file_path, change_type, sum_added_lines, sum_removed_lines FROM code_change """, ) commit_to_codechanges = defaultdict(list) for commit_hash, fpath, ctype, num_added, num_deleted in query_results: if fpath.endswith(".java"): fname = fpath[fpath.rfind("/") + 1:] commit_to_codechanges[commit_hash].append( (fpath, ctype, fname, num_added, num_deleted)) return commit_to_codechanges
def get_commit_to_issues(project_name): """ Get a mapping from commit hash to issue ids. Parameters ---------- project_name (str): Name of the project. "<project_name>.sqlite3" has to be in data folder. Returns ------- dict: Mapping from commit hash to issue ids. """ query_results = execute_db_query( "data/{}.sqlite3".format(project_name), """ SELECT issue_id, commit_hash FROM change_set_link """, ) commit_to_issues = defaultdict(list) for issue_id, commit_hash in query_results: commit_to_issues[commit_hash].append(issue_id) return commit_to_issues
def number_of_developers_before_preprocessing(): """ Generate the number of all distint developers for each project before preprocessing. For example, author name correction not applied yet. """ print("\n*** Number of Developers Before Preprocessing ***\n") print(("{:<12}" * len(project_list)).format(*project_list)) for project_name in project_list: num_devs = execute_db_query( "data/{}.sqlite3".format(project_name), "SELECT count(DISTINCT author) FROM change_set", )[0][0] print("{:<12}".format(num_devs), end="") print("\n")
def generate_issue_to_commenters(project_name): """ Generate a mapping from issue ids to commenters of the issues. Parameters ---------- project_name (str): Name of the project. Returns -------- dict: Mapping from issue ids to commenters of the issues. """ query_results = execute_db_query( "data/{}.sqlite3".format(project_name), """ SELECT issue_id, display_name FROM issue_comment """, ) issue_to_commenters = defaultdict(list) for issue_id, commenter in query_results: # Clear whitespaces and make lower case. commenter = commenter.strip().lower() # Check ignore commenters if commenter in ignored_commenters: continue # Replace the commenters name if it is in author mapping commenter = combined_author_mapping.get(commenter, commenter) # New issue issue_to_commenters[issue_id].append(commenter) return issue_to_commenters