def check_commit_dependency(self, commit_dependency_data): ''' Checks if the commit_dependency table contains the expected data given by self.commit_dependency in the unit test. :param commit_dependency_data: The data of the actual table: | id | commitId | file | entityId | entityType | size | impl | :return: ''' if self.commit_dependency is None: return conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) project_id = dbm.getProjectID(conf["project"], self.tagging) def get_commit_id(commit_hash): return dbm.getCommitId(project_id, commit_hash) # remove the "id" column # so we have (commit_id, file, entityId, type, size, impl) tuples data = [(res[1], res[2], res[3], res[4], res[5], res[6]) for res in commit_dependency_data] data_no_impl = [res[0:5] for res in data] expected_data = [(get_commit_id(res[0]), res[1], res[2], res[3], res[4], res[5]) for res in self.commit_dependency] for expected in expected_data: if expected[5] is None: # don't check the impl self.assertIn(expected[0:5], data_no_impl) else: self.assertIn(expected, data) self.assertEqual(len(data), len(expected_data))
def getResults(self): conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) project_id = dbm.getProjectID(conf["project"], self.tagging) self.assertGreaterEqual(project_id, 0) results = {} for table in self.result_tables: dbm.doExec("SELECT * FROM {table}".format(table=table)) results[table] = dbm.doFetchAll() return results
def setup_with_p(self, p): path = self.p.directory self.gitdir = dirname(path) self.resdir = pathjoin(path, ".git", "results") self.mldir = pathjoin(path, ".git") self.project_conf = self.p.codeface_conf self.no_report = False self.loglevel = "devinfo" self.logfile = pathjoin(path, ".git", "log") self.recreate = False # This config_file is added in the codeface test command handler self.codeface_conf = self.config_file conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) for table in pid_tables + other_tables: dbm.doExecCommit("DELETE FROM {}".format(table))
def run_extraction(conf, resdir, extract_commit_messages, extract_impl, extract_on_range_level): """ Runs the extraction process for the list of given parameters. :param conf: the Codeface configuration object :param resdir: the Codeface results dir, where output files are written """ log.info("%s: Extracting data" % conf["project"]) # initialize database manager with given configuration dbm = DBManager(conf) # get all types of extractions, both project-level and range-level __extractions_project, __extractions_range = extractions.get_extractions( dbm, conf, resdir, csv_writer, extract_commit_messages, extract_impl, extract_on_range_level) # run project-level extractions for extraction in __extractions_project: extraction.run() # run range-level extractions (only if explicitely enabled) if extract_on_range_level: # check if list of revisions in database is the same as in the config file revs = conf["revisions"] list_of_revisions = extractions.RevisionExtraction( dbm, conf, resdir, csv_writer).get_list() if revs: if set(revs) != set(list_of_revisions): log.error( "List of revisions in configuration file do not match the list stored in the DB! Stopping now." ) sys.exit(1) else: log.info( "List of revisions in configuration file and DB match.") else: log.info( "No list of revisions found in configuration file, using the list from the DB instead!" ) revs = list_of_revisions # set list of revisions as stored in the database # for all revisions of this project for i in range(len(revs) - 1): start_rev = revs[i] end_rev = revs[i + 1] range_number = i + 1 log.info("%s: Extracting data for range %s [version '%s']" % (conf["project"], range_number, end_rev)) for extraction in __extractions_range: extraction.run(range_number, start_rev, end_rev) log.info("Extraction complete!")
def checkEdges(self): conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) project_id = dbm.getProjectID(conf["project"], self.tagging) persons = dbm.get_project_persons(project_id) # Create map from id to name person_map = {person[0]: person[1] for person in persons} given_correct_edges = self.correct_edges if given_correct_edges[0][0] is str: # simply check the first range given_correct_edges = [self.correct_edges] release_ranges = dbm.get_release_ranges(project_id) i = -1 for correct_edges in given_correct_edges: i += 1 release_range = release_ranges[i] cluster_id = dbm.get_cluster_id(project_id, release_range) edgelist = dbm.get_edgelist(cluster_id) # Create edge list with developer names test_edges = [[person_map[edge[0]], person_map[edge[1]], edge[2]] for edge in edgelist] ## Check number of matches with known correct edges match_count = 0 for test_edge in test_edges: if test_edge in correct_edges: match_count += 1 res = (match_count == len(correct_edges)) self.assertTrue( res, msg="Project edgelist is incorrect for the v{}_release " "to v{}_release analysis!".format(i, i + 1))
def checkEdges(self): conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) project_id = dbm.getProjectID(conf["project"], self.tagging) persons = dbm.get_project_persons(project_id) # Create map from id to name person_map = {person[0] : person[1] for person in persons} given_correct_edges = self.correct_edges if given_correct_edges[0][0] is str: # simply check the first range given_correct_edges = [self.correct_edges] release_ranges = dbm.get_release_ranges(project_id) i = -1 for correct_edges in given_correct_edges: i += 1 release_range = release_ranges[i] cluster_id = dbm.get_cluster_id(project_id, release_range) edgelist = dbm.get_edgelist(cluster_id) # Create edge list with developer names test_edges = [[person_map[edge[0]], person_map[edge[1]], edge[2]] for edge in edgelist] ## Check number of matches with known correct edges match_count = 0 for test_edge in test_edges: if test_edge in correct_edges: match_count += 1 res = (match_count == len(correct_edges)) self.assertTrue( res, msg="Project edgelist is incorrect for the v{}_release " "to v{}_release analysis!" .format(i, i+1))
def checkClean(self): conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) project_id = dbm.getProjectID(conf["project"], self.tagging) dbm.doExecCommit("DELETE FROM project WHERE id={}".format(project_id)) for table in pid_tables: res = dbm.doExec("SELECT * FROM {table} WHERE projectId={pid}". format(table=table, pid=project_id)) self.assertEqual(res, 0, msg="Table '{}' still dirty!". format(table)) for table in other_tables: res = dbm.doExec("SELECT * FROM {table}".format(table=table)) self.assertEqual(res, 0, msg="Table '{}' still dirty!".format(table))
def checkEdges(self): conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) project_id = dbm.getProjectID(conf["project"], self.tagging) cluster_id = dbm.get_cluster_id(project_id) edgelist = dbm.get_edgelist(cluster_id) persons = dbm.get_project_persons(project_id) # Create map from id to name person_map = {person[0] : person[1] for person in persons} # Create edge list with developer names test_edges = [[person_map[edge[0]], person_map[edge[1]], edge[2]] for edge in edgelist] ## Check number of matches with known correct edges match_count = 0 for test_edge in test_edges: if test_edge in self.correct_edges: match_count += 1 res = (match_count == len(self.correct_edges)) self.assertTrue(res, msg="Project edgelist is incorrect!")
def insert_user_data(issues, conf): """Insert user data into database ad update issue data. :param issues: the issues to retrieve user data from :param conf: the project configuration :return: the updated issue data """ log.info("Syncing users with ID service...") # create buffer for users user_buffer = dict() # open database connection dbm = DBManager(conf) # open ID-service connection idservice = idManager(dbm, conf) def get_user_string(name, email): if not email or email is None: return "{name}".format(name=name) # return "{name} <{name}@default.com>".format(name=name) # for debugging only else: return "{name} <{email}>".format(name=name, email=email) def get_or_update_user(user, buffer_db=user_buffer): # fix encoding for name and e-mail address if user["name"] is not None: name = unicode(user["name"]).encode("utf-8") else: name = unicode(user["username"]).encode("utf-8") mail = unicode(user["email"]).encode("utf-8") # construct string for ID service and send query user_string = get_user_string(name, mail) # check buffer to reduce amount of DB queries if user_string in buffer_db: log.devinfo("Returning user '{}' from buffer.".format(user_string)) return buffer_db[user_string] # get person information from ID service log.devinfo("Passing user '{}' to ID service.".format(user_string)) idx = idservice.getPersonID(user_string) # update user data with person information from DB person = idservice.getPersonFromDB(idx) user["email"] = person["email1"] # column 'email1' user["name"] = person["name"] # column 'name' user["id"] = person["id"] # column 'id' # add user information to buffer # user_string = get_user_string(user["name"], user["email"]) # update for buffer_db[user_string] = user return user for issue in issues: # check database for issue author issue["user"] = get_or_update_user(issue["user"]) # check database for event authors for event in issue["eventsList"]: # get the event user from the DB event["user"] = get_or_update_user(event["user"]) # get the reference-target user from the DB if needed if event["ref_target"] != "": event["ref_target"] = get_or_update_user(event["ref_target"]) return issues
def clear_tables(self): conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) for table in self.result_tables: dbm.doExecCommit("DELETE FROM {}".format(table))
def insert_user_data(issues, conf): """ Insert user data into database and update issue data. :param issues: the issues to retrieve user data from :param conf: the project configuration :return: the updated issue data """ log.info("Syncing users with ID service...") # create buffer for users (key: user id) user_buffer = dict() # create buffer for user ids (key: user string) user_id_buffer = dict() # open database connection dbm = DBManager(conf) # open ID-service connection idservice = idManager(dbm, conf) def get_user_string(name, email): if not email or email is None: return "{name}".format(name=name) # return "{name} <{name}@default.com>".format(name=name) # for debugging only else: return "{name} <{email}>".format(name=name, email=email) def get_id_and_update_user(user, buffer_db_ids=user_id_buffer): # fix encoding for name and e-mail address if user["name"] is not None and user["name"] != "": name = unicode(user["name"]).encode("utf-8") else: name = unicode(user["username"]).encode("utf-8") mail = unicode(user["email"]).encode("utf-8") # empty # construct string for ID service and send query user_string = get_user_string(name, mail) # check buffer to reduce amount of DB queries if user_string in buffer_db_ids: log.devinfo( "Returning person id for user '{}' from buffer.".format( user_string)) return buffer_db_ids[user_string] # get person information from ID service log.devinfo("Passing user '{}' to ID service.".format(user_string)) idx = idservice.getPersonID(user_string) # add user information to buffer # user_string = get_user_string(user["name"], user["email"]) # update for buffer_db_ids[user_string] = idx return idx def get_user_from_id(idx, buffer_db=user_buffer): # check whether user information is in buffer to reduce amount of DB queries if idx in buffer_db: log.devinfo("Returning user '{}' from buffer.".format(idx)) return buffer_db[idx] # get person information from ID service log.devinfo("Passing user id '{}' to ID service.".format(idx)) person = idservice.getPersonFromDB(idx) user = dict() user["email"] = person["email1"] # column "email1" user["name"] = person["name"] # column "name" user["id"] = person["id"] # column "id" # add user information to buffer buffer_db[idx] = user return user # check and update database for all occurring users for issue in issues: # check database for issue author issue["author"] = get_id_and_update_user(issue["author"]) # check database for comment authors for comment in issue["comments"]: comment["author"] = get_id_and_update_user(comment["author"]) # check database for event authors in the history for event in issue["history"]: event["author"] = get_id_and_update_user(event["author"]) # check database for target user if needed if event["event"] == "assigned": assigned_user = get_id_and_update_user( create_user(event["event_info_1"], "", event["event_info_2"])) event["event_info_1"] = assigned_user # get all users after database updates having been performed for issue in issues: # get issue author issue["author"] = get_user_from_id(issue["author"]) # get comment authors for comment in issue["comments"]: comment["author"] = get_user_from_id(comment["author"]) # get event authors for non-comment events for event in issue["history"]: event["author"] = get_user_from_id(event["author"]) # get target user if needed if event["event"] == "assigned": assigned_user = get_user_from_id(event["event_info_1"]) event["event_info_1"] = assigned_user["name"] event["event_info_2"] = assigned_user["email"] log.debug("number of issues after insert_user_data: '{}'".format( len(issues))) return issues
def insert_user_data(issues, conf, resdir): """ Insert user data into database and update issue data. In addition, dump username-to-user list to file. :param issues: the issues to retrieve user data from :param conf: the project configuration :param resdir: the directory in which the username-to-user-list should be dumped :return: the updated issue data """ log.info("Syncing users with ID service...") # create buffer for users (key: user id) user_buffer = dict() # create buffer for user ids (key: user string) user_id_buffer = dict() # create buffer for usernames (key: username) username_id_buffer = dict() # open database connection dbm = DBManager(conf) # open ID-service connection idservice = idManager(dbm, conf) def get_user_string(name, email): if not email or email is None: return "{name}".format(name=name) # return "{name} <{name}@default.com>".format(name=name) # for debugging only else: return "{name} <{email}>".format(name=name, email=email) def get_id_and_update_user(user, buffer_db_ids=user_id_buffer, buffer_usernames=username_id_buffer): username = unicode(user["username"]).encode("utf-8") # fix encoding for name and e-mail address if user["name"] is not None: name = unicode(user["name"]).encode("utf-8") else: name = username mail = unicode(user["email"]).encode("utf-8") # construct string for ID service and send query user_string = get_user_string(name, mail) # check buffer to reduce amount of DB queries if user_string in buffer_db_ids: log.devinfo( "Returning person id for user '{}' from buffer.".format( user_string)) if username is not None: buffer_usernames[username] = buffer_db_ids[user_string] return buffer_db_ids[user_string] # get person information from ID service log.devinfo("Passing user '{}' to ID service.".format(user_string)) idx = idservice.getPersonID(user_string) # add user information to buffer # user_string = get_user_string(user["name"], user["email"]) # update for buffer_db_ids[user_string] = idx # add id to username buffer if username is not None: buffer_usernames[username] = idx return idx def get_user_from_id(idx, buffer_db=user_buffer): # check whether user information is in buffer to reduce amount of DB queries if idx in buffer_db: log.devinfo("Returning user '{}' from buffer.".format(idx)) return buffer_db[idx] # get person information from ID service log.devinfo("Passing user id '{}' to ID service.".format(idx)) person = idservice.getPersonFromDB(idx) user = dict() user["email"] = person["email1"] # column "email1" user["name"] = person["name"] # column "name" user["id"] = person["id"] # column "id" # add user information to buffer buffer_db[idx] = user return user # check and update database for all occurring users for issue in issues: # check database for issue author issue["user"] = get_id_and_update_user(issue["user"]) # check database for event authors for event in issue["eventsList"]: event["user"] = get_id_and_update_user(event["user"]) # check database for the reference-target user if needed if event["ref_target"] != "": event["ref_target"] = get_id_and_update_user( event["ref_target"]) # get all users after database updates having been performed for issue in issues: # get issue author issue["user"] = get_user_from_id(issue["user"]) # get event authors for event in issue["eventsList"]: event["user"] = get_user_from_id(event["user"]) # get the reference-target user if needed if event["ref_target"] != "": event["ref_target"] = get_user_from_id(event["ref_target"]) event["event_info_1"] = event["ref_target"]["name"] event["event_info_2"] = event["ref_target"]["email"] # dump username, name, and e-mail to file lines = [] for username in username_id_buffer: user = get_user_from_id(username_id_buffer[username]) lines.append((username, user["name"], user["email"])) log.info("Dump username list to file...") username_dump = os.path.join(resdir, "usernames.list") csv_writer.write_to_csv(username_dump, sorted(set(lines), key=lambda line: line[0])) return issues