Exemplo n.º 1
    def check_commit_dependency(self, commit_dependency_data):
        Checks if the commit_dependency table contains the expected data
        given by self.commit_dependency in the unit test.
        :param commit_dependency_data:
        The data of the actual table:
        | id  | commitId | file | entityId | entityType | size | impl |
        if self.commit_dependency is None:

        conf = Configuration.load(self.codeface_conf, self.project_conf)
        dbm = DBManager(conf)
        project_id = dbm.getProjectID(conf["project"], self.tagging)

        def get_commit_id(commit_hash):
            return dbm.getCommitId(project_id, commit_hash)

        # remove the "id" column
        # so we have (commit_id, file, entityId, type, size, impl) tuples
        data = [(res[1], res[2], res[3], res[4], res[5], res[6])
                for res in commit_dependency_data]
        data_no_impl = [res[0:5] for res in data]

        expected_data = [(get_commit_id(res[0]), res[1], res[2], res[3],
                          res[4], res[5]) for res in self.commit_dependency]
        for expected in expected_data:
            if expected[5] is None:
                # don't check the impl
                self.assertIn(expected[0:5], data_no_impl)
                self.assertIn(expected, data)

        self.assertEqual(len(data), len(expected_data))
Exemplo n.º 2
 def checkEdges(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     project_id = dbm.getProjectID(conf["project"], self.tagging)
     persons = dbm.get_project_persons(project_id)
     # Create map from id to name
     person_map = {person[0]: person[1] for person in persons}
     given_correct_edges = self.correct_edges
     if given_correct_edges[0][0] is str:
         # simply check the first range
         given_correct_edges = [self.correct_edges]
     release_ranges = dbm.get_release_ranges(project_id)
     i = -1
     for correct_edges in given_correct_edges:
         i += 1
         release_range = release_ranges[i]
         cluster_id = dbm.get_cluster_id(project_id, release_range)
         edgelist = dbm.get_edgelist(cluster_id)
         # Create edge list with developer names
         test_edges = [[person_map[edge[0]], person_map[edge[1]], edge[2]]
                       for edge in edgelist]
         ## Check number of matches with known correct edges
         match_count = 0
         for test_edge in test_edges:
             if test_edge in correct_edges:
                 match_count += 1
         res = (match_count == len(correct_edges))
             msg="Project edgelist is incorrect for the v{}_release "
             "to v{}_release analysis!".format(i, i + 1))
def run_extraction(conf, resdir, extract_commit_messages, extract_impl,
    Runs the extraction process for the list of given parameters.

    :param conf: the Codeface configuration object
    :param resdir: the Codeface results dir, where output files are written

    log.info("%s: Extracting data" % conf["project"])

    # initialize database manager with given configuration
    dbm = DBManager(conf)

    # get all types of extractions, both project-level and range-level
    __extractions_project, __extractions_range = extractions.get_extractions(
        dbm, conf, resdir, csv_writer, extract_commit_messages, extract_impl,

    # run project-level extractions
    for extraction in __extractions_project:

    # run range-level extractions (only if explicitely enabled)
    if extract_on_range_level:

        # check if list of revisions in database is the same as in the config file
        revs = conf["revisions"]
        list_of_revisions = extractions.RevisionExtraction(
            dbm, conf, resdir, csv_writer).get_list()
        if revs:
            if set(revs) != set(list_of_revisions):
                    "List of revisions in configuration file do not match the list stored in the DB! Stopping now."
                    "List of revisions in configuration file and DB match.")
                "No list of revisions found in configuration file, using the list from the DB instead!"
            revs = list_of_revisions  # set list of revisions as stored in the database

        # for all revisions of this project
        for i in range(len(revs) - 1):
            start_rev = revs[i]
            end_rev = revs[i + 1]
            range_number = i + 1

            log.info("%s: Extracting data for range %s [version '%s']" %
                     (conf["project"], range_number, end_rev))

            for extraction in __extractions_range:
                extraction.run(range_number, start_rev, end_rev)

    log.info("Extraction complete!")
Exemplo n.º 4
 def getResults(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     project_id = dbm.getProjectID(conf["project"], self.tagging)
     self.assertGreaterEqual(project_id, 0)
     results = {}
     for table in self.result_tables:
         dbm.doExec("SELECT * FROM {table}".format(table=table))
         results[table] = dbm.doFetchAll()
     return results
Exemplo n.º 5
 def checkClean(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     project_id = dbm.getProjectID(conf["project"], self.tagging)
     dbm.doExecCommit("DELETE FROM project WHERE id={}".format(project_id))
     for table in pid_tables:
         res = dbm.doExec("SELECT * FROM {table} WHERE projectId={pid}".
                          format(table=table, pid=project_id))
         self.assertEqual(res, 0, msg="Table '{}' still dirty!".
     for table in other_tables:
         res = dbm.doExec("SELECT * FROM {table}".format(table=table))
         self.assertEqual(res, 0,  msg="Table '{}' still dirty!".format(table))
Exemplo n.º 6
 def setup_with_p(self, p):
     path = self.p.directory
     self.gitdir = dirname(path)
     self.resdir = pathjoin(path, ".git", "results")
     self.mldir = pathjoin(path, ".git")
     self.project_conf = self.p.codeface_conf
     self.no_report = False
     self.loglevel = "devinfo"
     self.logfile = pathjoin(path, ".git", "log")
     self.recreate = False
     # This config_file is added in the codeface test command handler
     self.codeface_conf = self.config_file
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     for table in pid_tables + other_tables:
         dbm.doExecCommit("DELETE FROM {}".format(table))
Exemplo n.º 7
 def checkEdges(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     project_id = dbm.getProjectID(conf["project"], self.tagging)
     cluster_id = dbm.get_cluster_id(project_id)
     edgelist = dbm.get_edgelist(cluster_id)
     persons  = dbm.get_project_persons(project_id)
     # Create map from id to name
     person_map = {person[0] : person[1] for person in persons}
     # Create edge list with developer names
     test_edges = [[person_map[edge[0]], person_map[edge[1]], edge[2]] for edge in edgelist]
     ## Check number of matches with known correct edges
     match_count = 0
     for test_edge in test_edges:
         if test_edge in self.correct_edges:
             match_count += 1
     res = (match_count == len(self.correct_edges))
     self.assertTrue(res, msg="Project edgelist is incorrect!")
def insert_user_data(issues, conf):
    """Insert user data into database ad update issue data.

    :param issues: the issues to retrieve user data from
    :param conf: the project configuration
    :return: the updated issue data

    log.info("Syncing users with ID service...")

    # create buffer for users
    user_buffer = dict()
    # open database connection
    dbm = DBManager(conf)
    # open ID-service connection
    idservice = idManager(dbm, conf)

    def get_user_string(name, email):
        if not email or email is None:
            return "{name}".format(name=name)
            # return "{name} <{name}@default.com>".format(name=name)  # for debugging only
            return "{name} <{email}>".format(name=name, email=email)

    def get_or_update_user(user, buffer_db=user_buffer):
        # fix encoding for name and e-mail address
        if user["name"] is not None:
            name = unicode(user["name"]).encode("utf-8")
            name = unicode(user["username"]).encode("utf-8")
        mail = unicode(user["email"]).encode("utf-8")
        # construct string for ID service and send query
        user_string = get_user_string(name, mail)

        # check buffer to reduce amount of DB queries
        if user_string in buffer_db:
            log.devinfo("Returning user '{}' from buffer.".format(user_string))
            return buffer_db[user_string]

        # get person information from ID service
        log.devinfo("Passing user '{}' to ID service.".format(user_string))
        idx = idservice.getPersonID(user_string)

        # update user data with person information from DB
        person = idservice.getPersonFromDB(idx)
        user["email"] = person["email1"]  # column 'email1'
        user["name"] = person["name"]  # column 'name'
        user["id"] = person["id"]  # column 'id'

        # add user information to buffer
        # user_string = get_user_string(user["name"], user["email"]) # update for
        buffer_db[user_string] = user

        return user

    for issue in issues:
        # check database for issue author
        issue["user"] = get_or_update_user(issue["user"])

        # check database for event authors
        for event in issue["eventsList"]:
            # get the event user from the DB
            event["user"] = get_or_update_user(event["user"])
            # get the reference-target user from the DB if needed
            if event["ref_target"] != "":
                event["ref_target"] = get_or_update_user(event["ref_target"])

    return issues
Exemplo n.º 9
 def clear_tables(self):
     conf = Configuration.load(self.codeface_conf, self.project_conf)
     dbm = DBManager(conf)
     for table in self.result_tables:
         dbm.doExecCommit("DELETE FROM {}".format(table))
Exemplo n.º 10
def insert_user_data(issues, conf):
    Insert user data into database and update issue data.

    :param issues: the issues to retrieve user data from
    :param conf: the project configuration
    :return: the updated issue data

    log.info("Syncing users with ID service...")

    # create buffer for users (key: user id)
    user_buffer = dict()
    # create buffer for user ids (key: user string)
    user_id_buffer = dict()
    # open database connection
    dbm = DBManager(conf)
    # open ID-service connection
    idservice = idManager(dbm, conf)

    def get_user_string(name, email):
        if not email or email is None:
            return "{name}".format(name=name)
            # return "{name} <{name}@default.com>".format(name=name)  # for debugging only
            return "{name} <{email}>".format(name=name, email=email)

    def get_id_and_update_user(user, buffer_db_ids=user_id_buffer):
        # fix encoding for name and e-mail address
        if user["name"] is not None and user["name"] != "":
            name = unicode(user["name"]).encode("utf-8")
            name = unicode(user["username"]).encode("utf-8")
        mail = unicode(user["email"]).encode("utf-8")  # empty
        # construct string for ID service and send query
        user_string = get_user_string(name, mail)

        # check buffer to reduce amount of DB queries
        if user_string in buffer_db_ids:
                "Returning person id for user '{}' from buffer.".format(
            return buffer_db_ids[user_string]

        # get person information from ID service
        log.devinfo("Passing user '{}' to ID service.".format(user_string))
        idx = idservice.getPersonID(user_string)

        # add user information to buffer
        # user_string = get_user_string(user["name"], user["email"]) # update for
        buffer_db_ids[user_string] = idx

        return idx

    def get_user_from_id(idx, buffer_db=user_buffer):

        # check whether user information is in buffer to reduce amount of DB queries
        if idx in buffer_db:
            log.devinfo("Returning user '{}' from buffer.".format(idx))
            return buffer_db[idx]

        # get person information from ID service
        log.devinfo("Passing user id '{}' to ID service.".format(idx))
        person = idservice.getPersonFromDB(idx)
        user = dict()
        user["email"] = person["email1"]  # column "email1"
        user["name"] = person["name"]  # column "name"
        user["id"] = person["id"]  # column "id"

        # add user information to buffer
        buffer_db[idx] = user

        return user

    # check and update database for all occurring users
    for issue in issues:
        # check database for issue author
        issue["author"] = get_id_and_update_user(issue["author"])

        # check database for comment authors
        for comment in issue["comments"]:
            comment["author"] = get_id_and_update_user(comment["author"])

        # check database for event authors in the history
        for event in issue["history"]:
            event["author"] = get_id_and_update_user(event["author"])

            # check database for target user if needed
            if event["event"] == "assigned":
                assigned_user = get_id_and_update_user(
                    create_user(event["event_info_1"], "",
                event["event_info_1"] = assigned_user

    # get all users after database updates having been performed
    for issue in issues:
        # get issue author
        issue["author"] = get_user_from_id(issue["author"])

        # get comment authors
        for comment in issue["comments"]:
            comment["author"] = get_user_from_id(comment["author"])

        # get event authors for non-comment events
        for event in issue["history"]:
            event["author"] = get_user_from_id(event["author"])

            # get target user if needed
            if event["event"] == "assigned":
                assigned_user = get_user_from_id(event["event_info_1"])
                event["event_info_1"] = assigned_user["name"]
                event["event_info_2"] = assigned_user["email"]

    log.debug("number of issues after insert_user_data: '{}'".format(
    return issues
Exemplo n.º 11
def insert_user_data(issues, conf, resdir):
    Insert user data into database and update issue data.
    In addition, dump username-to-user list to file.

    :param issues: the issues to retrieve user data from
    :param conf: the project configuration
    :param resdir: the directory in which the username-to-user-list should be dumped
    :return: the updated issue data

    log.info("Syncing users with ID service...")

    # create buffer for users (key: user id)
    user_buffer = dict()
    # create buffer for user ids (key: user string)
    user_id_buffer = dict()
    # create buffer for usernames (key: username)
    username_id_buffer = dict()
    # open database connection
    dbm = DBManager(conf)
    # open ID-service connection
    idservice = idManager(dbm, conf)

    def get_user_string(name, email):
        if not email or email is None:
            return "{name}".format(name=name)
            # return "{name} <{name}@default.com>".format(name=name)  # for debugging only
            return "{name} <{email}>".format(name=name, email=email)

    def get_id_and_update_user(user,
        username = unicode(user["username"]).encode("utf-8")

        # fix encoding for name and e-mail address
        if user["name"] is not None:
            name = unicode(user["name"]).encode("utf-8")
            name = username
        mail = unicode(user["email"]).encode("utf-8")
        # construct string for ID service and send query
        user_string = get_user_string(name, mail)

        # check buffer to reduce amount of DB queries
        if user_string in buffer_db_ids:
                "Returning person id for user '{}' from buffer.".format(
            if username is not None:
                buffer_usernames[username] = buffer_db_ids[user_string]
            return buffer_db_ids[user_string]

        # get person information from ID service
        log.devinfo("Passing user '{}' to ID service.".format(user_string))
        idx = idservice.getPersonID(user_string)

        # add user information to buffer
        # user_string = get_user_string(user["name"], user["email"]) # update for
        buffer_db_ids[user_string] = idx

        # add id to username buffer
        if username is not None:
            buffer_usernames[username] = idx

        return idx

    def get_user_from_id(idx, buffer_db=user_buffer):

        # check whether user information is in buffer to reduce amount of DB queries
        if idx in buffer_db:
            log.devinfo("Returning user '{}' from buffer.".format(idx))
            return buffer_db[idx]

        # get person information from ID service
        log.devinfo("Passing user id '{}' to ID service.".format(idx))
        person = idservice.getPersonFromDB(idx)
        user = dict()
        user["email"] = person["email1"]  # column "email1"
        user["name"] = person["name"]  # column "name"
        user["id"] = person["id"]  # column "id"

        # add user information to buffer
        buffer_db[idx] = user

        return user

    # check and update database for all occurring users
    for issue in issues:
        # check database for issue author
        issue["user"] = get_id_and_update_user(issue["user"])

        # check database for event authors
        for event in issue["eventsList"]:
            event["user"] = get_id_and_update_user(event["user"])

            # check database for the reference-target user if needed
            if event["ref_target"] != "":
                event["ref_target"] = get_id_and_update_user(

    # get all users after database updates having been performed
    for issue in issues:
        # get issue author
        issue["user"] = get_user_from_id(issue["user"])

        # get event authors
        for event in issue["eventsList"]:
            event["user"] = get_user_from_id(event["user"])

            # get the reference-target user if needed
            if event["ref_target"] != "":
                event["ref_target"] = get_user_from_id(event["ref_target"])
                event["event_info_1"] = event["ref_target"]["name"]
                event["event_info_2"] = event["ref_target"]["email"]

    # dump username, name, and e-mail to file
    lines = []
    for username in username_id_buffer:
        user = get_user_from_id(username_id_buffer[username])
        lines.append((username, user["name"], user["email"]))

    log.info("Dump username list to file...")
    username_dump = os.path.join(resdir, "usernames.list")
                            sorted(set(lines), key=lambda line: line[0]))

    return issues