def get_commits(project_name):
    """
    Get commits in temporal order. Merge commits are excluded.

    Parameters
    ----------
    project_name (str):
        Name of the project. "<project_name>.sqlite3" has to be in data folder.
    
    Returns
    -------
    list:
        Tuples of commit hash, author and date in temporal order. 
        For example, "[(commit1, author1, 12Oct2013), (commit2, author1, 19Oct2013)]"
    """

    query_results = execute_db_query(
        "data/{}.sqlite3".format(project_name),
        """
        SELECT commit_hash, author, committed_date
        FROM change_set 
        WHERE is_merge=0
        ORDER BY committed_date
        """,
    )

    return [(commit_hash, author, committed_date)
            for commit_hash, author, committed_date in query_results]
def get_commit_to_codechanges(project_name):
    """
    Get a mapping from commit hash to code changes.

    Parameters
    ----------
    project_name (str):
        Name of the project. "<project_name>.sqlite3" has to be in data folder.
    
    Returns
    -------
    dict:
        Mapping from commit hash to code changes.
    """

    query_results = execute_db_query(
        "data/{}.sqlite3".format(project_name),
        """
        SELECT commit_hash, file_path, change_type, sum_added_lines, sum_removed_lines
        FROM code_change
        """,
    )
    commit_to_codechanges = defaultdict(list)
    for commit_hash, fpath, ctype, num_added, num_deleted in query_results:
        if fpath.endswith(".java"):
            fname = fpath[fpath.rfind("/") + 1:]
            commit_to_codechanges[commit_hash].append(
                (fpath, ctype, fname, num_added, num_deleted))

    return commit_to_codechanges
def get_commit_to_issues(project_name):
    """
    Get a mapping from commit hash to issue ids.

    Parameters
    ----------
    project_name (str):
        Name of the project. "<project_name>.sqlite3" has to be in data folder.
    
    Returns
    -------
    dict:
        Mapping from commit hash to issue ids.
    """
    query_results = execute_db_query(
        "data/{}.sqlite3".format(project_name),
        """
        SELECT issue_id, commit_hash
        FROM change_set_link
        """,
    )

    commit_to_issues = defaultdict(list)
    for issue_id, commit_hash in query_results:
        commit_to_issues[commit_hash].append(issue_id)

    return commit_to_issues
示例#4
0
def number_of_developers_before_preprocessing():
    """
    Generate the number of all distint developers for each project before preprocessing.
    For example, author name correction not applied yet.
    """
    print("\n*** Number of Developers Before Preprocessing ***\n")
    print(("{:<12}" * len(project_list)).format(*project_list))
    for project_name in project_list:
        num_devs = execute_db_query(
            "data/{}.sqlite3".format(project_name),
            "SELECT count(DISTINCT author) FROM change_set",
        )[0][0]
        print("{:<12}".format(num_devs), end="")
    print("\n")
def generate_issue_to_commenters(project_name):
    """
    Generate a mapping from issue ids to commenters of the issues.

    Parameters
    ----------
    project_name (str):
        Name of the project.

    Returns
    --------
    dict:
        Mapping from issue ids to commenters of the issues.
    """

    query_results = execute_db_query(
        "data/{}.sqlite3".format(project_name),
        """
        SELECT issue_id, display_name
        FROM issue_comment
        """,
    )

    issue_to_commenters = defaultdict(list)
    for issue_id, commenter in query_results:
        # Clear whitespaces and make lower case.
        commenter = commenter.strip().lower()
        # Check ignore commenters
        if commenter in ignored_commenters:
            continue

        # Replace the commenters name if it is in author mapping
        commenter = combined_author_mapping.get(commenter, commenter)

        # New issue
        issue_to_commenters[issue_id].append(commenter)

    return issue_to_commenters