Exemplos de write_to_csv em Python, exemplos de csv_writer.csv_writer.write_to_csv em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: jira_issue_processing.py Projeto: fehnkera/codeface-extraction

def print_to_disk(issues, results_folder):
    """
    Print issues to file "issues-jira.list" in result folder

    :param issues: the issues to dump
    :param results_folder: the folder where to place "issues-jira.list" output file
    """

    # construct path to output file
    output_file = os.path.join(results_folder, "issues-jira.list")
    log.info("Dumping output in file '{}'...".format(output_file))

    # construct lines of output
    lines = []
    for issue in issues:
        log.info("Current issue '{}'".format(issue["externalId"]))
        lines.append((issue["author"]["name"],
                      issue["author"]["email"],
                      issue["externalId"],
                      issue["creationDate"],
                      issue["externalId"],
                      issue["type"]))
        for comment in issue["comments"]:
            lines.append((
                comment["author"]["name"],
                comment["author"]["email"],
                comment["id"],
                comment["changeDate"],
                issue["externalId"],
                "comment"
            ))

    # write to output file
    csv_writer.write_to_csv(output_file, lines, append=True)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: issue_processing.py Projeto: fehnkera/codeface-extraction

def print_to_disk(issues, results_folder):
    """
    Print issues to file "issues.list" in result folder.
    This format is outdated but still used by the network library.
    TODO When the network library is updated, this method can be overwritten by "print_to_disk_new".

    :param issues: the issues to dump
    :param results_folder: the folder where to place "issues.list" output file
    """

    # construct path to output file
    output_file = os.path.join(results_folder, "issues.list")
    log.info("Dumping output in file '{}'...".format(output_file))

    # construct lines of output
    lines = []
    for issue in issues:
        for event in issue["eventsList"]:
            lines.append((issue["number"], issue["state"], issue["created_at"],
                          issue["closed_at"], issue["isPullRequest"],
                          event["user"]["name"], event["user"]["email"],
                          event["created_at"], "" if event["ref_target"] == ""
                          else event["ref_target"]["name"], event["event"]))

    # write to output file
    csv_writer.write_to_csv(output_file, lines)

Exemplo n.º 3

0

Exibir arquivo

def print_to_disk(issues, results_folder):
    """
    Print issues to file "issues-github.list" in the results folder.

    :param issues: the issues to dump
    :param results_folder: the folder where to place "issues-github.list" output file
    """

    # construct path to output file
    output_file = os.path.join(results_folder, "issues-github.list")
    log.info("Dumping output in file '{}'...".format(output_file))

    # construct lines of output
    lines = []
    for issue in issues:
        for event in issue["eventsList"]:
            lines.append((
                issue["number"],
                issue["title"],
                json.dumps(issue["type"]),
                issue["state_new"],
                json.dumps(issue["resolution"]),
                issue["created_at"],
                issue["closed_at"],
                json.dumps([]),  # components
                event["event"],
                event["user"]["name"],
                event["user"]["email"],
                event["created_at"],
                event["event_info_1"],
                json.dumps(event["event_info_2"])))

    # write to output file
    csv_writer.write_to_csv(
        output_file, sorted(set(lines), key=lambda line: lines.index(line)))

Exemplo n.º 4

0

Exibir arquivo

Arquivo: jira_issue_processing.py Projeto: fehnkera/codeface-extraction

def print_to_disk_extr(issues, results_folder):
    """
    Print issues to file "issues.list" in result folder

    :param issues: the issues to dump
    :param results_folder: the folder where to place "issues.list" output file
    """

    # construct path to output file
    output_file = os.path.join(results_folder, "issues.list")
    log.info("Dumping output in file '{}'...".format(output_file))

    # construct lines of output
    lines = []
    for issue in issues:
        log.info("Current issue '{}'".format(issue["externalId"]))

        lines.append((
            issue["externalId"],
            issue["state"],
            issue["creationDate"],
            issue["resolveDate"],
            False,  ## Value of is.pull.request
            issue["author"]["name"],
            issue["author"]["email"],
            issue["creationDate"],
            "",  ## ref.name
            "open"  ## event.name
        ))

        lines.append((
            issue["externalId"],
            issue["state"],
            issue["creationDate"],
            issue["resolveDate"],
            False,  ## Value of is.pull.request
            issue["author"]["name"],
            issue["author"]["email"],
            issue["creationDate"],
            "",  ## ref.name
            "commented"  ## event.name
        ))

        for comment in issue["comments"]:
            lines.append((
                issue["externalId"],
                issue["state"],
                issue["creationDate"],
                issue["resolveDate"],
                False,  ## Value of is.pull.request
                comment["author"]["name"],
                comment["author"]["email"],
                comment["changeDate"],
                "",  ## ref.name
                "commented"  ## event.name
            ))
    # write to output file
    csv_writer.write_to_csv(output_file, lines, append=True)

Exemplo n.º 5

0

Exibir arquivo

def parse(mbox_name, results_folder, include_filepath, files_as_artifacts,
          reindex, append_result):
    """Parse the given mbox file with the commit information from the results folder.

    :param mbox_name: the mbox file to search in
    :param results_folder: the results folder for index and commit information
    :param include_filepath: indicator whether to use the 'file name' part of the artifact into account
    :param files_as_artifacts: indicator whether to search for files (base names) as artifacts
    :param reindex: force reindexing if True
    :param append_result: flag whether to append the results for the current mbox file to the output file
    """

    # load mbox file
    mbox = mailbox.mbox(mbox_name)

    # create schema for text search
    analyzer = StandardAnalyzer(
        expression=r"[^\s,:\"']+"
    )  # split by whitespace, commas, colons, and quotation marks.
    schema = Schema(messageID=ID(stored=True), content=TEXT(analyzer=analyzer))

    # create/load index (initialize if necessary)
    ix = __get_index(mbox, mbox_name, results_folder, schema, reindex)

    # extract artifacts from results folder
    artifacts = __get_artifacts(results_folder, files_as_artifacts)

    # parallelize execution call for the text search
    log.info("Start parsing...")
    num_cores = multiprocessing.cpu_count()
    csv_data = Parallel(n_jobs=num_cores - 1)(
        delayed(__parse_execute)(commit, schema, ix, include_filepath)
        for commit in artifacts)
    log.info("Parsing finished.")

    # re-arrange results
    result = []
    if not append_result:
        result.append(('file', 'artifact', 'messageID'))
    for entry in csv_data:
        for row in entry:
            result.append(row)

    # determine ouput file
    filename = "mboxparsing"
    if files_as_artifacts:
        filename += "_file"
    if include_filepath:
        filename += "_filepath.list"
    else:
        filename += ".list"
    output_file = os.path.join(results_folder, filename)

    # Writes found hits to file.
    log.info("Writing results to file {}.".format(output_file))
    csv_writer.write_to_csv(output_file, result, append=append_result)

    log.info("Parsing mbox file complete!")

Exemplo n.º 6

0

Exibir arquivo

def print_to_disk_gephi(issues, results_folder):
    """
    Print issues to file "issues-jira-gephi-nodes.csv" and
    "issues-jira-gephi-edges.csv" in result folder. The files can be
     used to build dynamic networks in Gephi.

    :param issues: the issues to dump
    :param results_folder: the folder where to place the two output file
    """

    # construct path to output file
    output_file_nodes = os.path.join(results_folder,
                                     "issues-jira-gephi-nodes.csv")
    output_file_edges = os.path.join(results_folder,
                                     "issues-jira-gephi-edges.csv")
    log.info("Dumping output in file '{}'...".format(output_file_nodes))
    log.info("Dumping output in file '{}'...".format(output_file_edges))

    # construct lines of output
    node_lines = []
    edge_lines = []
    node_lines.append(("Id", "Type"))
    edge_lines.append(("Source", "Target", "Timestamp", "Edgetype"))
    for issue in issues:
        node_lines.append((issue["externalId"], "Issue"))
        node_lines.append((issue["author"]["name"], "Person"))

        edge_lines.append((issue["author"]["name"], issue["externalId"],
                           issue["creationDate"], "Person-Issue"))
        for comment in issue["comments"]:
            node_lines.append((comment["id"], "Comment"))
            node_lines.append((comment["author"]["name"], "Person"))

            edge_lines.append((issue["externalId"], comment["id"],
                               comment["changeDate"], "Issue-Comment"))
            edge_lines.append((comment["author"]["name"], comment["id"],
                               ["changeDate"], "Person-Comment"))
    # write to output file
    csv_writer.write_to_csv(output_file_edges, edge_lines, append=True)
    csv_writer.write_to_csv(output_file_nodes, node_lines, append=True)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: issue_processing.py Projeto: ecklbarb/codeface-extraction

def print_to_disk(issues, results_folder):
    """Print issues to file 'issues.list' in result folder

    :param issues: the issues to dump
    :param results_folder: the folder where to place 'issues.list' output file
    """

    # construct path to output file
    output_file = os.path.join(results_folder, "issues.list")
    log.info("Dumping output in file '{}'...".format(output_file))

    # construct lines of output
    lines = []
    for issue in issues:
        for event in issue["eventsList"]:
            lines.append((issue["number"], issue["state"], issue["created_at"],
                          issue["closed_at"], issue["isPullRequest"],
                          event["user"]["name"], event["user"]["email"],
                          event["created_at"], "" if event["ref_target"] == ""
                          else event["ref_target"]["name"], event["event"]))

    # write to output file
    csv_writer.write_to_csv(output_file, lines)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: issue_processing.py Projeto: bockthom/codeface-extraction

def print_to_disk(issues, results_folder):
    """
    Print issues to file "issues.list" in result folder.
    This format is outdated but still used by the network library.
    TODO When the network library is updated, this method can be overwritten by "print_to_disk_new".

    :param issues: the issues to dump
    :param results_folder: the folder where to place "issues.list" output file
    """

    # construct path to output file
    output_file = os.path.join(results_folder, "issues-github.list")
    log.info("Dumping output in file '{}'...".format(output_file))

    # construct lines of output
    lines = []
    for issue in issues:
        for event in issue["eventsList"]:
            lines.append((
                issue["number"],
                issue["title"],
                json.dumps(issue["type"]),
                issue["state_new"],
                json.dumps(issue["resolution"]),
                issue["created_at"],
                issue["closed_at"],
                json.dumps([]),  # components
                event["event"],
                event["user"]["name"],
                event["user"]["email"],
                event["created_at"],
                event["event_info_1"],
                json.dumps(event["event_info_2"])
            ))

    # write to output file
    csv_writer.write_to_csv(output_file, lines)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: issue_processing.py Projeto: fehnkera/codeface-extraction

def print_to_disk_new(issues, results_folder):
    """
    Print issues to file "issues_new.list" in result folder.
    This file has a consistent format to the "bugs-jira.list" file.
    TODO When the network library is updated, this is the format which shall be used.

    :param issues: the issues to dump
    :param results_folder: the folder where to place "issues.list" output file
    """

    # construct path to output file
    output_file = os.path.join(results_folder, "new_format.list")
    log.info("Dumping output in file '{}'...".format(output_file))

    # construct lines of output
    lines = []
    for issue in issues:
        for event in issue["eventsList"]:
            lines.append((
                issue["number"],
                issue["title"],
                issue["type"],
                issue["state_new"],
                issue["resolution"],
                issue["created_at"],
                issue["closed_at"],
                [],  # components
                event["event"],
                event["user"]["name"],
                event["user"]["email"],
                event["created_at"],
                event["event_info_1"],
                event["event_info_2"]))

    # write to output file
    csv_writer.write_to_csv(output_file, lines)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: author_postprocessing.py Projeto: bockthom/codeface-extraction

def run_postprocessing(conf, resdir, backup_data):
    """
    Runs the postprocessing for the given parameters, that is, read the disambiguation file of the project
    and replace all author names and e-mail addresses in all other .list files according to the disambiguation file.

    If backuping the data is enabled, all the .list files of the results dir are copied to a backup results dir
    (which has the suffix '_bak'). If this backkup results dir already exists, no backup is performed.

    :param conf: the Codeface configuration object
    :param resdir: the Codeface results dir, where output files are written
    :param backup_data: whether to backup the current .list files before performing the postprocessing
    """

    if backup_data:
        log.info("%s: Backup current data" % conf["project"])
        results_path = path.join(resdir, conf["project"], conf["tagging"])
        results_path_backup = path.join(resdir, conf["project"],
                                        conf["tagging"] + "_bak")
        perform_data_backup(results_path, results_path_backup)
        log.info("%s: Backup of current data complete!" % conf["project"])

    authors_list = "authors.list"
    commits_list = "commits.list"
    emails_list = "emails.list"
    issues_github_list = "issues-github.list"
    issues_jira_list = "issues-jira.list"
    bugs_jira_list = "bugs-jira.list"
    bots_list = "bots.list"

    # When looking at elements originating from json lists, we need to consider quotation marks around the string
    quot_m = "\""

    data_path = path.join(resdir, conf["project"], conf["tagging"])

    # Correctly replace author 'GitHub <*****@*****.**>' in the commit data and in "commit_added" events of the
    # GitHub issue data and remove this author in the author data, bot data, and e-mail data
    fix_github_browser_commits(data_path, issues_github_list, commits_list,
                               authors_list, emails_list, bots_list)

    log.info("%s: Postprocess authors after manual disambiguation" %
             conf["project"])
    disambiguation_list = path.join(data_path, "disambiguation-after-db.list")

    # Check if a disambiguation list exists - if not, just stop
    if path.exists(disambiguation_list):
        disambiguation_data = csv_writer.read_from_csv(disambiguation_list)
    else:
        log.info("Disambiguation file does not exist: %s", disambiguation_list)
        log.info("No postprocessing performed!")
        return

    # Check for all files in the result directory of the project whether they need to be adjusted
    for filepath, dirnames, filenames in walk(data_path):

        # (1) Adjust authors lists
        if authors_list in filenames:
            f = path.join(filepath, authors_list)
            log.info("Postprocess %s ...", f)
            author_data = csv_writer.read_from_csv(f)

            author_data_to_remove = []
            author_data_new = []

            # get persons which should be removed
            for person in disambiguation_data:
                author_data_to_remove.append([person[3], person[4], person[5]])

            for author in author_data:
                # keep author entry only if it should not be removed
                if not author in author_data_to_remove:
                    author_data_new.append(author)
            csv_writer.write_to_csv(f, author_data_new)

        # (2) Adjust commits lists
        if commits_list in filenames:
            f = path.join(filepath, commits_list)
            log.info("Postprocess %s ...", f)
            commit_data = csv_writer.read_from_csv(f)

            for person in disambiguation_data:
                for commit in commit_data:
                    # replace author if necessary
                    if person[4] == commit[2] and person[5] == commit[3]:
                        commit[2] = person[1]
                        commit[3] = person[2]
                    # replace committer if necessary
                    if person[4] == commit[5] and person[5] == commit[6]:
                        commit[5] = person[1]
                        commit[6] = person[2]

            csv_writer.write_to_csv(f, commit_data)

        # (3) Adjust emails lists
        if emails_list in filenames:
            f = path.join(filepath, emails_list)
            log.info("Postprocess %s ...", f)
            email_data = csv_writer.read_from_csv(f)

            for person in disambiguation_data:
                for email in email_data:
                    # replace author if necessary
                    if person[4] == email[0] and person[5] == email[1]:
                        email[0] = person[1]
                        email[1] = person[2]

            csv_writer.write_to_csv(f, email_data)

        # (4) Adjust issues lists (github)
        if issues_github_list in filenames:
            f = path.join(filepath, issues_github_list)
            log.info("Postprocess %s ...", f)
            issue_data = csv_writer.read_from_csv(f)

            for person in disambiguation_data:
                for issue_event in issue_data:
                    # replace author if necessary
                    if person[4] == issue_event[9] and person[
                            5] == issue_event[10]:
                        issue_event[9] = person[1]
                        issue_event[10] = person[2]
                    # replace person in event info 1/2 if necessary
                    if person[4] == issue_event[12] and (
                            quot_m + person[5] + quot_m) == issue_event[13]:
                        issue_event[12] = person[1]
                        issue_event[13] = quot_m + person[2] + quot_m

            csv_writer.write_to_csv(f, issue_data)

        # (5) Adjust issues lists (jira)
        if issues_jira_list in filenames:
            f = path.join(filepath, issues_jira_list)
            log.info("Postprocess %s ...", f)
            issue_data = csv_writer.read_from_csv(f)

            for person in disambiguation_data:
                for issue_event in issue_data:
                    # replace author if necessary
                    if person[4] == issue_event[9] and person[
                            5] == issue_event[10]:
                        issue_event[9] = person[1]
                        issue_event[10] = person[2]
                    # replace person in event info 1/2 if necessary
                    if person[4] == issue_event[12] and (
                            quot_m + person[5] + quot_m) == issue_event[13]:
                        issue_event[12] = person[1]
                        issue_event[13] = quot_m + person[2] + quot_m

            csv_writer.write_to_csv(f, issue_data)

        # (6) Adjust bugs lists (jira)
        if bugs_jira_list in filenames:
            f = path.join(filepath, bugs_jira_list)
            log.info("Postprocess %s ...", f)
            bug_data = csv_writer.read_from_csv(f)

            for person in disambiguation_data:
                for bug_event in bug_data:
                    # replace author if necessary
                    if person[4] == bug_event[9] and person[5] == bug_event[10]:
                        bug_event[9] = person[1]
                        bug_event[10] = person[2]
                    # replace person in event info 1/2 if necessary
                    if person[4] == bug_event[12] and (
                            quot_m + person[5] + quot_m) == bug_event[13]:
                        bug_event[12] = person[1]
                        bug_event[13] = quot_m + person[2] + quot_m

            csv_writer.write_to_csv(f, bug_data)

        # (7) Adjust bots list
        if bots_list in filenames:
            f = path.join(filepath, bots_list)
            log.info("Postprocess %s ...", f)
            bot_data = csv_writer.read_from_csv(f)

            bot_data_new = []
            bot_names_and_emails = dict()

            for person in disambiguation_data:
                for bot in bot_data:
                    # replace author if necessary
                    if person[4] == bot[0] and person[5] == bot[1]:
                        bot[0] = person[1]
                        bot[1] = person[2]

            # check for duplicate bot entries
            for bot in bot_data:
                # check if the bot is not already in the dict and add it
                if (bot[0], bot[1]) not in bot_names_and_emails:
                    bot_names_and_emails[(bot[0], bot[1])] = bot
                else:
                    # the bot is already in the list, check if there are different predictions
                    stored_bot = bot_names_and_emails[(bot[0], bot[1])]
                    if stored_bot[2] != bot[2]:
                        # if either of the predictions is bot, keep bot
                        if (stored_bot[2] == "Bot" or bot[2] == "Bot"):
                            stored_bot[2] = "Bot"
                            bot_names_and_emails[(bot[0], bot[1])] = stored_bot
                        # otherwise, if either of the predictions is human, keep human
                        elif (stored_bot[2] == "Human" or bot[2] == "Human"):
                            stored_bot[2] = "Human"
                            bot_names_and_emails[(bot[0], bot[1])] = stored_bot

            # determine final bot entries
            for bot in bot_data:
                updated_bot = bot_names_and_emails[(bot[0], bot[1])]
                if updated_bot not in bot_data_new:
                    bot_data_new.append(updated_bot)

            csv_writer.write_to_csv(f, bot_data_new)

    log.info("Postprocessing complete!")

Exemplo n.º 11

0

Exibir arquivo

Arquivo: jira_issue_processing.py Projeto: fehnkera/codeface-extraction

def print_to_disk_bugs(issues, results_folder):
    """
    Sorts of bug issues and prints them to file "bugs-jira.list" in result folder
    This method prints in a new format which is consistent to the format of "print_to_disk_new" in "issue_processing.py".
    TODO When the network library is updated this format shall be used in all print to disk methods.

    :param issues: the issues to sort of bugs
    :param results_folder: the folder where to place "bugs-jira.list" output file
    """

    # construct path to output file
    output_file = os.path.join(results_folder, "bugs-jira.list")
    log.info("Dumping output in file '{}'...".format(output_file))

    # construct lines of output
    lines = []
    for issue in issues:
        log.info("Current issue '{}'".format(issue["externalId"]))

        # only writes issues with type bug and their comments in the output file
        if "bug" in issue["type_new"]:
            lines.append((
                issue["externalId"],
                issue["title"],
                issue["type_new"],
                issue["state_new"],
                issue["resolution_new"],
                issue["creationDate"],
                issue["resolveDate"],
                issue["components"],
                "created",  ## event.name
                issue["author"]["name"],
                issue["author"]["email"],
                issue["creationDate"],
                "open",  ## default state when created
                ["unresolved"]  ## default resolution when created
            ))

            lines.append((
                issue["externalId"],
                issue["title"],
                issue["type_new"],
                issue["state_new"],
                issue["resolution_new"],
                issue["creationDate"],
                issue["resolveDate"],
                issue["components"],
                "commented",
                issue["author"]["name"],
                issue["author"]["email"],
                issue["creationDate"],
                "open",  ##  default state when created
                ["unresolved"]  ## default resolution when created
            ))

            for comment in issue["comments"]:
                lines.append((
                    issue["externalId"],
                    issue["title"],
                    issue["type_new"],
                    issue["state_new"],
                    issue["resolution_new"],
                    issue["creationDate"],
                    issue["resolveDate"],
                    issue["components"],
                    "commented",
                    comment["author"]["name"],
                    comment["author"]["email"],
                    comment["changeDate"],
                    comment["state_on_creation"],
                    comment["resolution_on_creation"]
                ))

            for history in issue["history"]:
                lines.append((
                    issue["externalId"],
                    issue["title"],
                    issue["type_new"],
                    issue["state_new"],
                    issue["resolution_new"],
                    issue["creationDate"],
                    issue["resolveDate"],
                    issue["components"],
                    history["event"],
                    history["author"]["name"],
                    history["author"]["email"],
                    history["date"],
                    history["event_info_1"],
                    history["event_info_2"]
                ))

    # write to output file
    csv_writer.write_to_csv(output_file, lines, append=True)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: author_postprocessing.py Projeto: bockthom/codeface-extraction

def fix_github_browser_commits(data_path, issues_github_list, commits_list,
                               authors_list, emails_list, bots_list):
    """
    Replace the author "GitHub <*****@*****.**>" in both commit and GitHub issue data by the correct author.
    The author "GitHub <*****@*****.**>" is automatically inserted as the committer of a commit that is made when
    editing a file via the web frontend of GitHub. Hence, replace the committer of such commits with the commit's
    author, as author and committer are the same person in such a situation. This also holds for the "commit_added"
    event in GitHub issue data: As this usually uses the committer of a commit as its author, also use the commit's
    author as the author of the "commit_added" event. All other events in the GitHub issue data in which the author is
    "GitHub <*****@*****.**>" are removed. Also "mentioned" or "subscribed" events in the GitHub issue data which
    reference the author "GitHub <*****@*****.**>" are removed from the GitHub issue data. In addition, remove the
    author "GitHub <*****@*****.**>" also from the author data and bot data and remove e-mails that have been sent
    by this author.

    :param data_path: the path to the project data that is to be fixed
    :param issues_github_list: file name of the github issue data
    :param commits_list: file name of the corresponding commit data
    :param authors_list: file name of the corresponding author data
    :param emails_list: file name of the corresponding email data
    :param bots_list: file name of the corresponding bot data
    """
    github_user = "******"
    github_email = "*****@*****.**"
    commit_added_event = "commit_added"
    mentioned_event = "mentioned"
    subscribed_event = "subscribed"
    """
    Helper function to check whether a (name, e-mail) pair belongs to the author "GitHub <*****@*****.**>".
    There are two options in Codeface how this can happen:
    (1) Username is "GitHub" and e-mail address is "*****@*****.**"
    (2) Username is "GitHub" and e-mail address has been replaced by Codeface, resulting in "*****@*****.**"

    :param name: the name of the author to be checked
    :param email: the email address of the author to be checked
    :return: whether the given (name, email) pair belongs to the "GitHub <*****@*****.**>" author
    """
    def is_github_noreply_author(name, email):
        return (name == github_user
                and (email == github_email
                     or email == (github_user + "." + github_email)))

    # Check for all files in the result directory of the project whether they need to be adjusted
    for filepath, dirnames, filenames in walk(data_path):

        # (1) Remove author 'GitHub <*****@*****.**>' from authors list
        if authors_list in filenames:
            f = path.join(filepath, authors_list)
            log.info("Remove author %s <%s> in %s ...", github_user,
                     github_email, f)
            author_data = csv_writer.read_from_csv(f)

            author_data_new = []

            for author in author_data:
                # keep author entry only if it should not be removed
                if not is_github_noreply_author(author[1], author[2]):
                    author_data_new.append(author)
            csv_writer.write_to_csv(f, author_data_new)

        # (2) Remove e-mails from author 'GitHub <*****@*****.**>' from all emails.list files
        if emails_list in filenames:
            f = path.join(filepath, emails_list)
            log.info("Remove emails from author %s <%s> in %s ...",
                     github_user, github_email, f)
            email_data = csv_writer.read_from_csv(f)

            email_data_new = []

            for email in email_data:
                # keep author entry only if it should not be removed
                if not is_github_noreply_author(email[0], email[1]):
                    email_data_new.append(email)
                else:
                    log.warn("Remove email %s as it was sent by %s <%s>.",
                             email[2], email[0], email[1])
            csv_writer.write_to_csv(f, email_data_new)

        # (3) Replace the committer 'GitHub <*****@*****.**>' in all commit.list files
        if commits_list in filenames:
            f = path.join(filepath, commits_list)
            log.info("Replace author %s <%s> in %s ...", github_user,
                     github_email, f)
            commit_data = csv_writer.read_from_csv(f)

            for commit in commit_data:
                # replace committer 'GitHub <*****@*****.**>' by the commit's author
                # (as author and committer are identical when using GitHub's browser interface)
                if is_github_noreply_author(commit[5], commit[6]):
                    commit[5] = commit[2]
                    commit[6] = commit[3]

            csv_writer.write_to_csv(f, commit_data)

        # (4) Replace author 'GitHub <*****@*****.**>' in all "commit_added" events in the GitHub issue data
        # and remove all other events in which 'GitHub <*****@*****.**>' is either author or referenced.
        if issues_github_list in filenames:
            f = path.join(filepath, issues_github_list)
            log.info("Replace author %s <%s> in %s ...", github_user,
                     github_email, f)
            issue_data = csv_writer.read_from_csv(f)

            # read commit data
            commit_data_file = path.join(data_path, commits_list)
            commit_data = csv_writer.read_from_csv(commit_data_file)
            commit_hash_to_author = {
                commit[7]: commit[2:4]
                for commit in commit_data
            }

            issue_data_new = []

            for event in issue_data:
                # replace author if necessary
                if is_github_noreply_author(
                        event[9],
                        event[10]) and event[8] == commit_added_event:
                    # extract commit hash from event info 1
                    commit_hash = event[12]

                    # extract commit author from commit data, if available
                    if commit_hash in commit_hash_to_author:
                        event[9] = commit_hash_to_author[commit_hash][0]
                        event[10] = commit_hash_to_author[commit_hash][1]
                        issue_data_new.append(event)
                    else:
                        # the added commit is not part of the commit data. In most cases, this is due to merge commits
                        # appearing in another pull request, as Codeface does not keep track of merge commits. As we
                        # ignore merge commits in the commit data, we consistently ignore them also if they are added
                        # to a pull request. Hence, the corresponding "commit_added" event will be removed now (i.e.,
                        # not added to the new issue data any more).
                        log.warn(
                            "Commit %s is added in the GitHub issue data, but not part of the commit data. "
                            +
                            "Remove the corresponding 'commit_added' event from the issue data...",
                            commit_hash)
                elif is_github_noreply_author(event[9], event[10]):
                    # the event is authored by 'GitHub <*****@*****.**>', but is not a "commit_added" event, so we
                    # neglect this event and remove it now (i.e., not add it to the new issue data any more).
                    log.warn(
                        "Event %s is authored by %s <%s>. Remove this event form the issue data...",
                        event[8], event[9], event[10])
                elif (is_github_noreply_author(event[12], event[13][1:-1])
                      and (event[8] == mentioned_event
                           or event[8] == subscribed_event)):
                    # the event references 'GitHub <*****@*****.**>', so we neglect this event and remove it now
                    # (i.e., not add it to the new issue data any more).
                    log.warn(
                        "Event %s by %s <%s> references %s <%s>. Remove this event from the issue data...",
                        event[8], event[9], event[10], event[12], event[13])
                else:
                    issue_data_new.append(event)

            csv_writer.write_to_csv(f, issue_data_new)

        # (5) Remove author 'GitHub <*****@*****.**>' from bots.list
        if bots_list in filenames:
            f = path.join(filepath, bots_list)
            log.info("Remove author %s <%s> from %s ...", github_user,
                     github_email, f)
            bot_data = csv_writer.read_from_csv(f)

            bot_data_new = []

            for entry in bot_data:
                # keep bot entry only if it should not be removed
                if not is_github_noreply_author(entry[0], entry[1]):
                    bot_data_new.append(entry)
                else:
                    log.warn("Remove entry %s <%s> from bots list.", entry[0],
                             entry[1])

            csv_writer.write_to_csv(f, bot_data_new)

    log.info("Replacing GitHub user: Done.")

Exemplo n.º 13

0

Exibir arquivo

Arquivo: jira_issue_processing.py Projeto: ecklbarb/codeface-extraction

def print_to_disk_bugs(issues, results_folder, skip_history):
    """Sorts of bug issues and prints them to file 'bugs-jira.list' in result folder

    :param issues: the issues to sort of bugs
    :param results_folder: the folder where to place 'bugs-jira.list' output file
    :param skip_history: flag if history informations got retrieved and can be printed to the output file
    """

    # construct path to output file
    output_file = os.path.join(results_folder, "bugs-jira.list")
    log.info("Dumping output in file '{}'...".format(output_file))

    # construct lines of output
    lines = []
    for issue in issues:
        log.info("Current issue '{}'".format(issue['externalId']))

        # only writes issues with type bug and their comments in the output file
        if issue['type'] == "Bug":
            lines.append((
                issue['externalId'],
                issue['state'],
                issue['resolution'],
                issue['creationDate'],
                issue['resolveDate'],
                False,  ## Value of is.pull.request
                issue['author']['name'],
                issue['author']['email'],
                issue['creationDate'],
                issue['references'],
                "open",  ## event.name
                issue['components'],
                "Open",  ## default state when created
                "Unresolved"  ## default resolution when created
            ))

            lines.append((
                issue['externalId'],
                issue['state'],
                issue['resolution'],
                issue['creationDate'],
                issue['resolveDate'],
                False,  ## Value of is.pull.request
                issue['author']['name'],
                issue['author']['email'],
                issue['creationDate'],
                "",  ## ref.name
                "commented",  ## event.name
                "",  ##components
                "Open",  ##  default state when created
                "Unresolved"  ## default resolution when created
            ))

            for comment in issue["comments"]:
                lines.append((
                    issue['externalId'],
                    issue['state'],
                    issue['resolution'],
                    issue['creationDate'],
                    issue['resolveDate'],
                    False,  ## Value of is.pull.request
                    comment['author']['name'],
                    comment['author']['email'],
                    comment['changeDate'],
                    "",  ## ref.name
                    "commented",  ## event.name
                    "",  ##components
                    comment['state_on_creation'],
                    comment['resolution_on_creation']))

            if not skip_history:
                for history in issue['history']:
                    lines.append((
                        issue['externalId'],
                        issue['state'],
                        issue['resolution'],
                        issue['creationDate'],
                        issue['resolveDate'],
                        False,  ## Value of is.pull.request
                        history['author']['name'],
                        history['author']['email'],
                        history['date'],
                        "",  ## ref.name
                        "updated",  ## event.name
                        "",  ##components
                        history['new_state'],
                        history['new_resolution']))

    # write to output file
    csv_writer.write_to_csv(output_file, lines)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: anonymization.py Projeto: bockthom/codeface-extraction

def run_anonymization(conf, resdir):
    """
    Runs the anonymization process for the given parameters, that is, replaces names, e-mail addresses, message ids,
    and issue titles with pseudonymized contents in all .list files in resdir.
    Writes the anonymized .list files to another directory (resdir + "_threemonth").

    :param conf: the Codeface configuration object
    :param resdir: the Codeface results dir, where result files are read from
    """

    authors_list = "authors.list"
    commits_list = "commits.list"
    emails_list = "emails.list"
    issues_github_list = "issues-github.list"
    issues_jira_list = "issues-jira.list"
    bugs_jira_list = "bugs-jira.list"
    bots_list = "bots.list"
    gender_list = "gender.list"
    revisions_list = "revisions.list"  # not to be anonymized, only to be copied to the "anonymized" directory

    # When looking at elements originating from json lists, we need to consider quotation marks around the string
    quot_m = "\""

    data_path = path.join(resdir, conf["project"], conf["tagging"])
    anonymize_path = path.join((resdir + "_anonymized"), conf["project"],
                               conf["tagging"])
    if not path.exists(anonymize_path):
        log.info("Create directory %s", anonymize_path)
        makedirs(anonymize_path)

    log.info("%s: Anonymize authors." % conf["project"])

    # create dictionaries to store mappings from authors to anonymized authors and titles to anonymized titles
    author_to_anonymized_author = dict()
    author_to_anonymized_author_gender = dict()
    i = 0
    i_gender = 0
    title_to_anonymized_title = dict()
    k = 0
    """
    Helper function to anonymize author data (i.e., data from the authors.list file).

    :param author_data: the author data to be anonymized (must have been read via "csv_writer.read_from_csv")
    :param i: counter for anonymized developer names (i.e., its current start value which has not been used yet)
    :param author_to_anonymized_author: dictionary in which to lookup and store mappings from (name, e-mail) pairs
                                        to anonymized (name, e-mail) pairs for the developers
    :param name_only: whether also the name (without e-mail) should be used as key for the dictionary
                      "author_to_anonymized_author". This is necessary if there might be lookups using
                      auto-generated and, therefore, different e-mail addresses for the same name.
    :return: the anonymized "author_data",
             the current value of "i" (which has not been used yet),
             and the updated dictionary "author_to_anonymized_author"
    """
    def anonymize_authors(author_data,
                          i,
                          author_to_anonymized_author,
                          name_only=False):

        for author in author_data:
            orig_author = author[1]
            orig_email = author[2]

            # Don't anonymize the deleted user as this one might be needed for filtering (but add it to the dictionary)
            if orig_author == "Deleted user" and orig_email == "*****@*****.**":
                if not (orig_author,
                        orig_email) in author_to_anonymized_author:
                    author_to_anonymized_author[(orig_author,
                                                 orig_email)] = (orig_author,
                                                                 orig_email)
            else:
                # check whether (name, e-mail) pair isn't already present in the dictionary
                if not (orig_author,
                        orig_email) in author_to_anonymized_author:
                    # check if just the name (without e-mail address) isn't already present in the dictionary
                    if not orig_author in author_to_anonymized_author:
                        # if the author has an empty name, only anonymize their e-mail address
                        if not author[1] == "":
                            author[1] = ("developer" + str(i))
                        author[2] = ("mail" + str(i) + "@dev.org")

                        # add new entry to dictionary (using (name, e-mail) pair as key)
                        author_to_anonymized_author[(orig_author,
                                                     orig_email)] = (author[1],
                                                                     author[2])
                        # if we allow name-only entries, also add an additional entry to dictionary
                        if name_only:
                            author_to_anonymized_author[orig_author] = (
                                author[1], author[2])

                        # increment counter as we have generated a new anonymized developer id
                        i += 1
                    else:
                        # as just the name (without e-mail address) is present in the dictionary, make a lookup
                        # for the name only and add a new entry to the dictionary using (name, e-mail) pair
                        author_new = author_to_anonymized_author[orig_author]
                        author_to_anonymized_author[(
                            orig_author, orig_email)] = (author_new[0],
                                                         author_new[1])
                        author[1] = author_new[0]
                        author[2] = author_new[1]
                else:
                    # as the (name, e-mail) pair is present in the dictionary, just make a lookup for the pair
                    author_new = author_to_anonymized_author[(orig_author,
                                                              orig_email)]
                    author[1] = author_new[0]
                    author[2] = author_new[1]

        return author_data, i, author_to_anonymized_author

    # Check for all files in the result directory of the project whether they need to be anonymized
    for filepath, dirnames, filenames in walk(data_path):

        # (1) Anonymize authors lists
        if authors_list in filenames:
            f = path.join(filepath, authors_list)
            log.info("Anonymize %s ...", f)
            author_data = csv_writer.read_from_csv(f)
            author_data_gender = csv_writer.read_from_csv(f)

            # check if tagging is "feature"
            if conf["tagging"] == "feature":
                # as tagging is "feature", we need to check for the proximity data to keep anonymized ids consistent
                # over both feature and proximity data

                # if corresponding proximity data exists, read authors from proximity data and use them for
                # anonymization to make anonymized proximity data and feature data consistent
                f_proximity = f.replace("feature", "proximity")
                if path.isfile(f_proximity):
                    log.info(
                        "Read authors from %s and anonymize them (without dumping to file).",
                        f_proximity)
                    author_data_proximity = csv_writer.read_from_csv(
                        f_proximity)

                    # anonymize authors from proximity data (but just add them to our dictionary, to be used below
                    # for the actual anonymization of the feature data)
                    author_data_proximity, i, author_to_anonymized_author = \
                      anonymize_authors(author_data_proximity, i, author_to_anonymized_author, name_only = True)

            # anonymize authors
            author_data, i, author_to_anonymized_author = \
              anonymize_authors(author_data, i, author_to_anonymized_author)

            author_data_gender, i_gender, author_to_anonymized_author_gender = \
              anonymize_authors(author_data_gender, i_gender, author_to_anonymized_author_gender, name_only = True)

            output_path = f.replace(data_path, anonymize_path)
            if not path.exists(path.dirname(output_path)):
                makedirs(path.dirname(output_path))
            log.info("Write anonymized data to %s ...", output_path)
            csv_writer.write_to_csv(output_path, author_data)

        # (2) Anonymize commits lists
        if commits_list in filenames:
            f = path.join(filepath, commits_list)
            log.info("Anonymize %s ...", f)
            commit_data = csv_writer.read_from_csv(f)

            for commit in commit_data:
                # anonymize author
                new_author = author_to_anonymized_author[(commit[2],
                                                          commit[3])]
                commit[2] = new_author[0]
                commit[3] = new_author[1]
                # anonymize committer
                new_committer = author_to_anonymized_author[(commit[5],
                                                             commit[6])]
                commit[5] = new_committer[0]
                commit[6] = new_committer[1]

            output_path = f.replace(data_path, anonymize_path)
            if not path.exists(path.dirname(output_path)):
                makedirs(output_path)
            log.info("Write anonymized data to %s ...", output_path)
            csv_writer.write_to_csv(output_path, commit_data)

        # (3) Anonymize emails lists
        if emails_list in filenames:
            f = path.join(filepath, emails_list)
            log.info("Anonymize %s ...", f)
            email_data = csv_writer.read_from_csv(f)

            j = 0

            for email in email_data:
                # anonymize author
                new_author = author_to_anonymized_author[(email[0], email[1])]
                email[0] = new_author[0]
                email[1] = new_author[1]
                # anonymize message id
                email[2] = ("<message" + str(j) + "@message.dev.org>")
                j += 1

            output_path = f.replace(data_path, anonymize_path)
            if not path.exists(path.dirname(output_path)):
                makedirs(path.dirname(output_path))
            log.info("Write anonymized data to %s ...", output_path)
            csv_writer.write_to_csv(output_path, email_data)

        # (4) Anonymize issues lists (github)
        if issues_github_list in filenames:
            f = path.join(filepath, issues_github_list)
            log.info("Anonymize %s ...", f)
            issue_data = csv_writer.read_from_csv(f)

            for issue_event in issue_data:
                # anonymize author
                new_author = author_to_anonymized_author[(issue_event[9],
                                                          issue_event[10])]
                issue_event[9] = new_author[0]
                issue_event[10] = new_author[1]
                # anonymize person in event info 1/2
                if (issue_event[12],
                        issue_event[13][1:-1]) in author_to_anonymized_author:
                    new_person = author_to_anonymized_author[(
                        issue_event[12], issue_event[13][1:-1])]
                    issue_event[12] = new_person[0]
                    issue_event[13] = quot_m + new_person[1] + quot_m
                # anonymize issue title
                if issue_event[1] in title_to_anonymized_title:
                    issue_event[1] = title_to_anonymized_title[issue_event[1]]
                else:
                    new_title = ("issue-title-" + str(k))
                    title_to_anonymized_title[issue_event[1]] = new_title
                    issue_event[1] = new_title
                    k += 1

            output_path = f.replace(data_path, anonymize_path)
            if not path.exists(path.dirname(output_path)):
                makedirs(path.dirname(output_path))
            log.info("Write anonymized data to %s ...", output_path)
            csv_writer.write_to_csv(output_path, issue_data)

        # (5) Anonymize issues lists (jira)
        if issues_jira_list in filenames:
            f = path.join(filepath, issues_jira_list)
            log.info("Anonymize %s ...", f)
            issue_data = csv_writer.read_from_csv(f)

            for issue_event in issue_data:
                # anonymize author
                new_author = author_to_anonymized_author[(issue_event[9],
                                                          issue_event[10])]
                issue_event[9] = new_author[0]
                issue_event[10] = new_author[1]
                # anonymize person in event info 1/2
                if (issue_event[12],
                        issue_event[13][1:-1]) in author_to_anonymized_author:
                    new_person = author_to_anonymized_author[(
                        issue_event[12], issue_event[13][1:-1])]
                    issue_event[12] = new_person[0]
                    issue_event[13] = quot_m + new_person[1] + quot_m
                # anonymize issue title
                if issue_event[1] in title_to_anonymized_title:
                    issue_event[1] = title_to_anonymized_title[issue_event[1]]
                else:
                    new_title = ("issue-title-" + str(k))
                    title_to_anonymized_title[issue_event[1]] = new_title
                    issue_event[1] = new_title
                    k += 1

            output_path = f.replace(data_path, anonymize_path)
            if not path.exists(path.dirname(output_path)):
                makedirs(path.dirname(output_path))
            log.info("Write anonymized data to %s ...", output_path)
            csv_writer.write_to_csv(output_path, issue_data)

        # (6) Anonymize bugs lists (jira)
        if bugs_jira_list in filenames:
            f = path.join(filepath, bugs_jira_list)
            log.info("Anonymize %s ...", f)
            bug_data = csv_writer.read_from_csv(f)

            for bug_event in bug_data:
                # anonymize author
                new_author = author_to_anonymized_author[(bug_event[9],
                                                          bug_event[10])]
                bug_event[9] = new_author[0]
                bug_event[10] = new_author[1]
                # anonymize person in event info 1/2
                if (issue_event[12],
                        issue_event[13][1:-1]) in author_to_anonymized_author:
                    new_person = author_to_anonymized_author[(
                        bug_event[12], bug_event[13][1:-1])]
                    bug_event[12] = new_person[0]
                    bug_event[13] = quot_m + new_person[1] + quot_m
                # anonymize bug title
                if bug_event[1] in title_to_anonymized_title:
                    bug_event[1] = title_to_anonymized_title[bug_event[1]]
                else:
                    new_title = ("issue-title-" + str(k))
                    title_to_anonymized_title[bug_event[1]] = new_title
                    bug_event[1] = new_title
                    k += 1

            output_path = f.replace(data_path, anonymize_path)
            if not path.exists(path.dirname(output_path)):
                makedirs(path.dirname(output_path))
            log.info("Write anonymized data to %s ...", output_path)
            csv_writer.write_to_csv(output_path, bug_data)

        # (7) Anonymize bots list
        if bots_list in filenames:
            f = path.join(filepath, bots_list)
            log.info("Anonymize %s ...", f)
            bot_data = csv_writer.read_from_csv(f)

            for bot in bot_data:
                new_person = author_to_anonymized_author[(bot[0], bot[1])]
                bot[0] = new_person[0]
                bot[1] = new_person[1]

            output_path = f.replace(data_path, anonymize_path)
            if not path.exists(path.dirname(output_path)):
                makedirs(path.dirname(output_path))
            log.info("Write anonymized data to %s ...", output_path)
            csv_writer.write_to_csv(output_path, bot_data)

        # (8) Anonymize gender list
        if gender_list in filenames:
            f = path.join(filepath, gender_list)
            log.info("Anonymize %s ...", f)
            gender_data = csv_writer.read_from_csv(f)
            gender_data_new = []

            for author in gender_data:
                if author[0] in author_to_anonymized_author_gender.keys():
                    new_person = author_to_anonymized_author_gender[author[0]]
                    author[0] = new_person[0]
                    gender_data_new.append(author)

            output_path = f.replace(data_path, anonymize_path)
            if not path.exists(path.dirname(output_path)):
                makedirs(path.dirname(output_path))
            log.info("Write anonymized data to %s ...", output_path)
            csv_writer.write_to_csv(output_path, gender_data_new)

        # (9) Copy revisions list
        if revisions_list in filenames:
            f = path.join(filepath, revisions_list)
            log.info("Copy %s ...", f)
            revision_data = csv_writer.read_from_csv(f)

            output_path = f.replace(data_path, anonymize_path)
            if not path.exists(path.dirname(output_path)):
                makedirs(path.dirname(output_path))
            log.info("Copy revision data to %s ...", output_path)
            csv_writer.write_to_csv(output_path, revision_data)

    log.info("Anonymization complete!")

Exemplo n.º 15

0

Exibir arquivo

def insert_user_data(issues, conf, resdir):
    """
    Insert user data into database and update issue data.
    In addition, dump username-to-user list to file.

    :param issues: the issues to retrieve user data from
    :param conf: the project configuration
    :param resdir: the directory in which the username-to-user-list should be dumped
    :return: the updated issue data
    """

    log.info("Syncing users with ID service...")

    # create buffer for users (key: user id)
    user_buffer = dict()
    # create buffer for user ids (key: user string)
    user_id_buffer = dict()
    # create buffer for usernames (key: username)
    username_id_buffer = dict()
    # open database connection
    dbm = DBManager(conf)
    # open ID-service connection
    idservice = idManager(dbm, conf)

    def get_user_string(name, email):
        if not email or email is None:
            return "{name}".format(name=name)
            # return "{name} <{name}@default.com>".format(name=name)  # for debugging only
        else:
            return "{name} <{email}>".format(name=name, email=email)

    def get_id_and_update_user(user,
                               buffer_db_ids=user_id_buffer,
                               buffer_usernames=username_id_buffer):
        username = unicode(user["username"]).encode("utf-8")

        # fix encoding for name and e-mail address
        if user["name"] is not None:
            name = unicode(user["name"]).encode("utf-8")
        else:
            name = username
        mail = unicode(user["email"]).encode("utf-8")
        # construct string for ID service and send query
        user_string = get_user_string(name, mail)

        # check buffer to reduce amount of DB queries
        if user_string in buffer_db_ids:
            log.devinfo(
                "Returning person id for user '{}' from buffer.".format(
                    user_string))
            if username is not None:
                buffer_usernames[username] = buffer_db_ids[user_string]
            return buffer_db_ids[user_string]

        # get person information from ID service
        log.devinfo("Passing user '{}' to ID service.".format(user_string))
        idx = idservice.getPersonID(user_string)

        # add user information to buffer
        # user_string = get_user_string(user["name"], user["email"]) # update for
        buffer_db_ids[user_string] = idx

        # add id to username buffer
        if username is not None:
            buffer_usernames[username] = idx

        return idx

    def get_user_from_id(idx, buffer_db=user_buffer):

        # check whether user information is in buffer to reduce amount of DB queries
        if idx in buffer_db:
            log.devinfo("Returning user '{}' from buffer.".format(idx))
            return buffer_db[idx]

        # get person information from ID service
        log.devinfo("Passing user id '{}' to ID service.".format(idx))
        person = idservice.getPersonFromDB(idx)
        user = dict()
        user["email"] = person["email1"]  # column "email1"
        user["name"] = person["name"]  # column "name"
        user["id"] = person["id"]  # column "id"

        # add user information to buffer
        buffer_db[idx] = user

        return user

    # check and update database for all occurring users
    for issue in issues:
        # check database for issue author
        issue["user"] = get_id_and_update_user(issue["user"])

        # check database for event authors
        for event in issue["eventsList"]:
            event["user"] = get_id_and_update_user(event["user"])

            # check database for the reference-target user if needed
            if event["ref_target"] != "":
                event["ref_target"] = get_id_and_update_user(
                    event["ref_target"])

    # get all users after database updates having been performed
    for issue in issues:
        # get issue author
        issue["user"] = get_user_from_id(issue["user"])

        # get event authors
        for event in issue["eventsList"]:
            event["user"] = get_user_from_id(event["user"])

            # get the reference-target user if needed
            if event["ref_target"] != "":
                event["ref_target"] = get_user_from_id(event["ref_target"])
                event["event_info_1"] = event["ref_target"]["name"]
                event["event_info_2"] = event["ref_target"]["email"]

    # dump username, name, and e-mail to file
    lines = []
    for username in username_id_buffer:
        user = get_user_from_id(username_id_buffer[username])
        lines.append((username, user["name"], user["email"]))

    log.info("Dump username list to file...")
    username_dump = os.path.join(resdir, "usernames.list")
    csv_writer.write_to_csv(username_dump,
                            sorted(set(lines), key=lambda line: line[0]))

    return issues

Exemplo n.º 16

0

Exibir arquivo

def print_to_disk_bugs(issues, results_folder):
    """
    Extract bug issues and prints them to file "bugs-jira.list" in result folder.
    This method prints in a format which is consistent to the format of "print_to_disk" in "issue_processing.py".

    :param issues: the issues to sort of bugs
    :param results_folder: the folder where to place "bugs-jira.list" output file
    """

    # construct path to output file
    output_file = os.path.join(results_folder, "bugs-jira.list")
    log.info("Dumping output in file '{}'...".format(output_file))

    # construct lines of output
    lines = []
    for issue in issues:
        log.info("Current issue '{}'".format(issue["externalId"]))

        # only write issues with type bug and their comments in the output file
        if "bug" in issue["type_list"]:

            # add the creation event
            lines.append((
                issue["externalId"],
                issue["title"],
                json.dumps(issue["type_list"]),
                issue["state_new"],
                json.dumps(issue["resolution_list"]),
                issue["creationDate"],
                issue["resolveDate"],
                json.dumps(issue["components"]),
                "created",  ## event.name
                issue["author"]["name"],
                issue["author"]["email"],
                issue["creationDate"],
                "open",  ## default state when created
                json.dumps(["unresolved"])  ## default resolution when created
            ))

            # add an additional commented event for the creation
            lines.append((
                issue["externalId"],
                issue["title"],
                json.dumps(issue["type_list"]),
                issue["state_new"],
                json.dumps(issue["resolution_list"]),
                issue["creationDate"],
                issue["resolveDate"],
                json.dumps(issue["components"]),
                "commented",
                issue["author"]["name"],
                issue["author"]["email"],
                issue["creationDate"],
                "open",  ##  default state when created
                json.dumps(["unresolved"])  ## default resolution when created
            ))

            # add comment events
            for comment in issue["comments"]:
                lines.append(
                    (issue["externalId"], issue["title"],
                     json.dumps(issue["type_list"]), issue["state_new"],
                     json.dumps(issue["resolution_list"]),
                     issue["creationDate"], issue["resolveDate"],
                     json.dumps(issue["components"]), "commented",
                     comment["author"]["name"], comment["author"]["email"],
                     comment["changeDate"], comment["state_on_creation"],
                     json.dumps(comment["resolution_on_creation"])))

            # add history events
            for history in issue["history"]:
                lines.append(
                    (issue["externalId"], issue["title"],
                     json.dumps(issue["type_list"]), issue["state_new"],
                     json.dumps(issue["resolution_list"]),
                     issue["creationDate"], issue["resolveDate"],
                     json.dumps(issue["components"]), history["event"],
                     history["author"]["name"], history["author"]["email"],
                     history["date"], history["event_info_1"],
                     json.dumps(history["event_info_2"])))

    # write to output file
    csv_writer.write_to_csv(output_file, lines, append=True)

Exemplo n.º 17

0

Exibir arquivo

def print_to_disk(issues, results_folder):
    """
    Print issues to file "issues-jira.list" in result folder.

    :param issues: the issues to dump
    :param results_folder: the folder where to place "issues-jira.list" output file
    """

    # construct path to output file
    output_file = os.path.join(results_folder, "issues-jira.list")
    log.info("Dumping output in file '{}'...".format(output_file))

    # construct lines of output
    lines = []
    for issue in issues:
        log.info("Current issue '{}'".format(issue["externalId"]))

        # add the creation event
        lines.append((
            issue["externalId"],
            issue["title"],
            json.dumps(issue["type_list"]),
            issue["state_new"],
            json.dumps(issue["resolution_list"]),
            issue["creationDate"],
            issue["resolveDate"],
            json.dumps(issue["components"]),
            "created",  ## event.name
            issue["author"]["name"],
            issue["author"]["email"],
            issue["creationDate"],
            "open",  ## default state when created
            json.dumps(["unresolved"])  ## default resolution when created
        ))

        # add an additional commented event for the creation
        lines.append((
            issue["externalId"],
            issue["title"],
            json.dumps(issue["type_list"]),
            issue["state_new"],
            json.dumps(issue["resolution_list"]),
            issue["creationDate"],
            issue["resolveDate"],
            json.dumps(issue["components"]),
            "commented",
            issue["author"]["name"],
            issue["author"]["email"],
            issue["creationDate"],
            "open",  ##  default state when created
            json.dumps(["unresolved"])  ## default resolution when created
        ))

        # add comment events
        for comment in issue["comments"]:
            lines.append(
                (issue["externalId"], issue["title"],
                 json.dumps(issue["type_list"]), issue["state_new"],
                 json.dumps(issue["resolution_list"]),
                 issue["creationDate"], issue["resolveDate"],
                 json.dumps(issue["components"]), "commented",
                 comment["author"]["name"], comment["author"]["email"],
                 comment["changeDate"], comment["state_on_creation"],
                 json.dumps(comment["resolution_on_creation"])))

        # add history events
        for history in issue["history"]:
            lines.append(
                (issue["externalId"], issue["title"],
                 json.dumps(issue["type_list"]), issue["state_new"],
                 json.dumps(issue["resolution_list"]),
                 issue["creationDate"], issue["resolveDate"],
                 json.dumps(issue["components"]), history["event"],
                 history["author"]["name"], history["author"]["email"],
                 history["date"], history["event_info_1"],
                 json.dumps(history["event_info_2"])))

    # write to output file
    csv_writer.write_to_csv(output_file, lines, append=True)