def testLoad(self): '''Test that an example configuration is loaded correctly''' global_conf = NamedTemporaryFile(delete=False) global_conf.write(""" # Foo commentary! A:B --- # Fake Database access information dbhost: remotehost dbuser: '******' dbpwd: thepassword dbname: 'thedb' # intermediate comment idServicePort: 4242 idServiceHostname: foohost """) global_conf.close() project_conf = NamedTemporaryFile(delete=False) project_conf.write(""" # Fake commentary! --- project: theproject repo: therepo # Relative to git-dir as specified on the command line description: the description ml: the mailing list revisions: [ "v1", "v2", "v3", "v4", "v5"] rcs : ["v1rc0", "v2rc0", "v3rc0", "v4rc0", "v5rc0" ] new_tag: newvalue tagging: tag """) project_conf.close() c = Configuration.load(global_conf.name, project_conf.name) self.assertEqual(c["dbhost"], "remotehost") self.assertEqual(c["dbuser"], "theuser") self.assertEqual(c["dbpwd"], "thepassword") self.assertEqual(c["dbname"], "thedb") self.assertEqual(c["project"], "theproject") self.assertEqual(c["idServicePort"], 4242) self.assertEqual(c["idServiceHostname"], "foohost") self.assertEqual(c["repo"], "therepo") self.assertEqual(c["description"], "the description") self.assertEqual(c["ml"], "the mailing list") self.assertEqual(c["revisions"], ["v1", "v2", "v3", "v4", "v5"]) self.assertEqual(c["rcs"], ["v1rc0", "v2rc0", "v3rc0", "v4rc0", "v5rc0"]) self.assertEqual(c["tagging"], "tag") self.assertEqual(c["new_tag"], "newvalue") os.unlink(global_conf.name) os.unlink(project_conf.name) # Check that the configuration is valid YAML yaml_conf = NamedTemporaryFile(delete=False) yaml_conf.write(str(c)) yaml_conf.close() c2 = Configuration.load(yaml_conf.name) self.assertEqual(dict(c), dict(c2)) os.unlink(yaml_conf.name)
def testDict(self): '''Quick test if a Configuration object behaves like a dict''' c = Configuration() expected_keys = set(("idServicePort", "nodejsHostname")) self.assertEqual(set(c.keys()), expected_keys) print(str(c)) for k in c: self.assertIn(k, expected_keys) c[k]
def run(): # get Codeface parser parser = get_parser() args = parser.parse_args(sys.argv[1:]) # Note: The first argument of argv is the name of the command # process arguments # - First make all the args absolute __resdir = abspath(args.resdir) __codeface_conf, __project_conf = map(abspath, (args.config, args.project)) __conf = Configuration.load(__codeface_conf, __project_conf) # project_analyse(resdir, gitdir, codeface_conf, project_conf, # args.no_report, args.loglevel, logfile, args.recreate, # args.profile_r, args.jobs, args.tagging, args.reuse_db) __artifact2tagging = { 'feature': [ ('Feature', 'author2feature'), ('FeatureExpression', 'author2featureexpression') ], 'proximity': [ ('Function', 'author2function') ] # ('Function', 'author2file') # FIXME implement author2file (needs new SELECT) } run_extraction(__conf, __artifact2tagging[__conf["tagging"]], __resdir)
def run(): # get all needed paths and argument for the method call. parser = argparse.ArgumentParser(prog='codeface-extraction-issues-github', description='Codeface extraction') parser.add_argument('-c', '--config', help="Codeface configuration file", default='codeface.conf') parser.add_argument('-p', '--project', help="Project configuration file", required=True) parser.add_argument('resdir', help="Directory to store analysis results in") # parse arguments args = parser.parse_args(sys.argv[1:]) __codeface_conf, __project_conf = map(os.path.abspath, (args.config, args.project)) # create configuration __conf = Configuration.load(__codeface_conf, __project_conf) # get source and results folders __srcdir = os.path.abspath(os.path.join(args.resdir, __conf['repo'] + "_issues")) __resdir = os.path.abspath(os.path.join(args.resdir, __conf['project'], __conf["tagging"])) # run processing of issue data: # 1) load the list of issues issues = load(__srcdir) # 2) re-format the issues issues = reformat_issues(issues) # 3) merges all issue events into one list issues = merge_issue_events(issues) # 4) re-format the eventsList of the issues issues = reformat_events(issues) # 5) update user data with Codeface database issues = insert_user_data(issues, __conf) # 6) dump result to disk print_to_disk(issues, __resdir) log.info("Github issue processing complete!")
def run(): """Run the mbox-parsing process""" # get all needed paths and argument for the method call. parser = argparse.ArgumentParser(prog='codeface-extraction-mbox', description='Codeface extraction') parser.add_argument('-c', '--config', help="Codeface configuration file", default='codeface.conf') parser.add_argument('-p', '--project', help="Project configuration file", required=True) parser.add_argument('-f', '--filepath', help="Include the filepath in the search", action="store_true") parser.add_argument('--file', help="Use files (reps. their base names) as artifacts", action="store_true") parser.add_argument('-r', '--reindex', help="Re-construct the index", action="store_true") parser.add_argument('resdir', help="Directory to store analysis results in") parser.add_argument('maildir', help='Directory in which the mailinglists are located') # construct data paths args = parser.parse_args(sys.argv[1:]) __resdir = abspath(args.resdir) __maildir = abspath(args.maildir) __codeface_conf, __project_conf = map(abspath, (args.config, args.project)) # initialize configuration __conf = Configuration.load(__codeface_conf, __project_conf) __resdir_project = os.path.join(__resdir, __conf["project"], __conf["tagging"]) # search the mailing lists for ml in __conf["mailinglists"]: mbox_file = os.path.join(__maildir, ml["name"] + ".mbox") parse(mbox_file, __resdir_project, args.filepath, args.file, args.reindex)
def run(): # Get all needed paths and argument for the method call. parser = argparse.ArgumentParser(prog='codeface', description='Codeface extraction') parser.add_argument('-c', '--config', help="Codeface configuration file", default='codeface.conf') parser.add_argument('-p', '--project', help="Project configuration file", required=True) parser.add_argument('-f', '--filepath', help="Include the filepath in the search", action="store_true") parser.add_argument('resdir', help="Directory to store analysis results in") parser.add_argument('maildir', help='Directory in which the mailinglists are located') args = parser.parse_args(sys.argv[1:]) __resdir = abspath(args.resdir) __maildir = abspath(args.maildir) __codeface_conf, __project_conf = map(abspath, (args.config, args.project)) __conf = Configuration.load(__codeface_conf, __project_conf) for ml in __conf["mailinglists"]: parse( __maildir + "/" + ml["name"] + ".mbox", __resdir + "/" + __conf["repo"] + "_" + __conf["tagging"] + "/" + __conf["tagging"], args.filepath)
def checkEdges(self): conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) project_id = dbm.getProjectID(conf["project"], self.tagging) persons = dbm.get_project_persons(project_id) # Create map from id to name person_map = {person[0]: person[1] for person in persons} given_correct_edges = self.correct_edges if given_correct_edges[0][0] is str: # simply check the first range given_correct_edges = [self.correct_edges] release_ranges = dbm.get_release_ranges(project_id) i = -1 for correct_edges in given_correct_edges: i += 1 release_range = release_ranges[i] cluster_id = dbm.get_cluster_id(project_id, release_range) edgelist = dbm.get_edgelist(cluster_id) # Create edge list with developer names test_edges = [[person_map[edge[0]], person_map[edge[1]], edge[2]] for edge in edgelist] ## Check number of matches with known correct edges match_count = 0 for test_edge in test_edges: if test_edge in correct_edges: match_count += 1 res = (match_count == len(correct_edges)) self.assertTrue( res, msg="Project edgelist is incorrect for the v{}_release " "to v{}_release analysis!".format(i, i + 1))
def check_commit_dependency(self, commit_dependency_data): ''' Checks if the commit_dependency table contains the expected data given by self.commit_dependency in the unit test. :param commit_dependency_data: The data of the actual table: | id | commitId | file | entityId | entityType | size | impl | :return: ''' if self.commit_dependency is None: return conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) project_id = dbm.getProjectID(conf["project"], self.tagging) def get_commit_id(commit_hash): return dbm.getCommitId(project_id, commit_hash) # remove the "id" column # so we have (commit_id, file, entityId, type, size, impl) tuples data = [(res[1], res[2], res[3], res[4], res[5], res[6]) for res in commit_dependency_data] data_no_impl = [res[0:5] for res in data] expected_data = [(get_commit_id(res[0]), res[1], res[2], res[3], res[4], res[5]) for res in self.commit_dependency] for expected in expected_data: if expected[5] is None: # don't check the impl self.assertIn(expected[0:5], data_no_impl) else: self.assertIn(expected, data) self.assertEqual(len(data), len(expected_data))
def checkEdges(self): conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) project_id = dbm.getProjectID(conf["project"], self.tagging) persons = dbm.get_project_persons(project_id) # Create map from id to name person_map = {person[0] : person[1] for person in persons} given_correct_edges = self.correct_edges if given_correct_edges[0][0] is str: # simply check the first range given_correct_edges = [self.correct_edges] release_ranges = dbm.get_release_ranges(project_id) i = -1 for correct_edges in given_correct_edges: i += 1 release_range = release_ranges[i] cluster_id = dbm.get_cluster_id(project_id, release_range) edgelist = dbm.get_edgelist(cluster_id) # Create edge list with developer names test_edges = [[person_map[edge[0]], person_map[edge[1]], edge[2]] for edge in edgelist] ## Check number of matches with known correct edges match_count = 0 for test_edge in test_edges: if test_edge in correct_edges: match_count += 1 res = (match_count == len(correct_edges)) self.assertTrue( res, msg="Project edgelist is incorrect for the v{}_release " "to v{}_release analysis!" .format(i, i+1))
def getResults(self): conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) project_id = dbm.getProjectID(conf["project"], self.tagging) self.assertGreaterEqual(project_id, 0) results = {} for table in self.result_tables: dbm.doExec("SELECT * FROM {table}".format(table=table)) results[table] = dbm.doFetchAll() return results
def checkClean(self): conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) project_id = dbm.getProjectID(conf["project"], self.tagging) dbm.doExecCommit("DELETE FROM project WHERE id={}".format(project_id)) for table in pid_tables: res = dbm.doExec("SELECT * FROM {table} WHERE projectId={pid}". format(table=table, pid=project_id)) self.assertEqual(res, 0, msg="Table '{}' still dirty!". format(table)) for table in other_tables: res = dbm.doExec("SELECT * FROM {table}".format(table=table)) self.assertEqual(res, 0, msg="Table '{}' still dirty!".format(table))
def run(): # get Codeface parser parser = get_parser() args = parser.parse_args(sys.argv[1:]) # Note: The first argument of argv is the name of the command # process arguments # - First make all the args absolute __resdir = abspath(args.resdir) __codeface_conf, __project_conf = map(abspath, (args.config, args.project)) # load configuration __conf = Configuration.load(__codeface_conf, __project_conf) run_extraction(__conf, __resdir)
def setup_with_p(self, p): path = self.p.directory self.gitdir = dirname(path) self.resdir = pathjoin(path, ".git", "results") self.mldir = pathjoin(path, ".git") self.project_conf = self.p.codeface_conf self.no_report = False self.loglevel = "devinfo" self.logfile = pathjoin(path, ".git", "log") self.recreate = False # This config_file is added in the codeface test command handler self.codeface_conf = self.config_file conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) for table in pid_tables + other_tables: dbm.doExecCommit("DELETE FROM {}".format(table))
def run(): # get all needed paths and argument for the method call. parser = argparse.ArgumentParser(prog='codeface-extraction-issues-github', description='Codeface extraction') parser.add_argument('-c', '--config', help="Codeface configuration file", default='codeface.conf') parser.add_argument('-p', '--project', help="Project configuration file", required=True) parser.add_argument('resdir', help="Directory to store analysis results in") # parse arguments args = parser.parse_args(sys.argv[1:]) __codeface_conf, __project_conf = map(os.path.abspath, (args.config, args.project)) # create configuration __conf = Configuration.load(__codeface_conf, __project_conf) # get source and results folders __srcdir = os.path.abspath( os.path.join(args.resdir, __conf['repo'] + "_issues")) __resdir = os.path.abspath( os.path.join(args.resdir, __conf['project'], __conf["tagging"])) # run processing of issue data: # 1) load the list of issues issues = load(__srcdir) # 2) re-format the issues issues = reformat_issues(issues) # 3) merges all issue events into one list issues = merge_issue_events(issues) # 4) re-format the eventsList of the issues issues = reformat_events(issues) # 5) update user data with Codeface database issues = insert_user_data(issues, __conf) # 6) dump result to disk print_to_disk(issues, __resdir) print_to_disk_new(issues, __resdir) log.info("Github issue processing complete!")
def checkEdges(self): conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) project_id = dbm.getProjectID(conf["project"], self.tagging) cluster_id = dbm.get_cluster_id(project_id) edgelist = dbm.get_edgelist(cluster_id) persons = dbm.get_project_persons(project_id) # Create map from id to name person_map = {person[0] : person[1] for person in persons} # Create edge list with developer names test_edges = [[person_map[edge[0]], person_map[edge[1]], edge[2]] for edge in edgelist] ## Check number of matches with known correct edges match_count = 0 for test_edge in test_edges: if test_edge in self.correct_edges: match_count += 1 res = (match_count == len(self.correct_edges)) self.assertTrue(res, msg="Project edgelist is incorrect!")
def run(): # get all needed paths and arguments for the method call. parser = argparse.ArgumentParser(prog="codeface-extraction-issues-jira", description="Codeface extraction") parser.add_argument("-c", "--config", help="Codeface configuration file", default="codeface.conf") parser.add_argument("-p", "--project", help="Project configuration file", required=True) parser.add_argument("resdir", help="Directory to store analysis results in") parser.add_argument( "-s", "--skip_history", help= "Skip methods that retrieve additional history information from the configured JIRA" + "server. This decreases the runtime and shuts off the external connection", action="store_true") # parse arguments args = parser.parse_args(sys.argv[1:]) __codeface_conf, __project_conf = map(os.path.abspath, (args.config, args.project)) # create configuration __conf = Configuration.load(__codeface_conf, __project_conf) # get source and results folders __srcdir = os.path.abspath( os.path.join(args.resdir, __conf["repo"] + "_proximity", "conway", "issues_xml")) __resdir = os.path.abspath( os.path.join(args.resdir, __conf["project"], __conf["tagging"])) __srcdir_csv = os.path.abspath( os.path.join(args.resdir, __conf["repo"] + "_proximity", "conway")) # get person folder # __psrcdir = os.path.abspath(os.path.join(args.resdir, __conf["repo"] + "_proximity", "conway")) # load the list of persons persons = load_csv(__srcdir_csv) # load the xml-file list file_list = [ f for f in os.listdir(__srcdir) if os.path.isfile(os.path.join(__srcdir, f)) ] # creates empty result files clear_result_files(__resdir) # list for malformed or missing xml-files incorrect_files = [] # processes every xml-file for current_file in file_list: # 1) load the list of issues issues = load_xml(__srcdir, current_file) # if an error occurred while loading the xml-file if issues is None: incorrect_files.append(current_file) continue # 2) re-format the issues issues = parse_xml(issues, persons, args.skip_history) # 3) load issue information via api if not args.skip_history: load_issue_via_api(issues, persons, __conf["issueTrackerURL"]) # 4) update user data with Codeface database # ATTENTION: As the database update is performed for every iteration in this for loop, but the current issue # data is appended to the results file immediately, the database updates from the later iterations are not # respected in the previously dumped issues from the previous iterations. However, as we don't get email # data from JIRA, this is currently not a problem, as no names will change in the database if we don't # provide emails. If JIRA will provide email data in the future, this implementation needs to be adjusted # in such a way that users in issue data of all iterations are updated in the end and dumped afterwards, # instead of dumping the intermediate issue data immediately. issues = insert_user_data(issues, __conf) # 5) dump result to disk print_to_disk(issues, __resdir) # # 6) export for Gephi # print_to_disk_gephi(issues, __resdir) # # 7) export for jira issue extraction to use them in dev-network-growth # print_to_disk_extr(issues, __resdir) # 8) dump bug issues to disk print_to_disk_bugs(issues, __resdir) log.info("Jira issue processing complete!") log.info("In total, " + str(jira_request_counter) + " requests have been sent to Jira.") if incorrect_files: log.info("Following files where malformed or not existing:: " + str(incorrect_files))
def run(): # get all needed paths and argument for the method call. parser = argparse.ArgumentParser(prog='codeface', description='Codeface extraction') parser.add_argument('-c', '--config', help="Codeface configuration file", default='codeface.conf') parser.add_argument('-p', '--project', help="Project configuration file", required=True) parser.add_argument('resdir', help="Directory to store analysis results in") parser.add_argument( '-s', '--skip_history', help= "Skip methods that retrieve additional history information from the configured JIRA" + "server. This decreases the runtime and shuts off the external connection", action='store_true') # parse arguments args = parser.parse_args(sys.argv[1:]) __codeface_conf, __project_conf = map(os.path.abspath, (args.config, args.project)) # create configuration __conf = Configuration.load(__codeface_conf, __project_conf) # get source and results folders __srcdir = os.path.abspath( os.path.join(args.resdir, __conf['repo'] + "_proximity", "conway", "issues_xml")) __resdir = os.path.abspath( os.path.join(args.resdir, __conf['project'], __conf["tagging"])) __srcdir_csv = os.path.abspath( os.path.join(args.resdir, __conf['repo'] + "_proximity", "conway")) # get person folder # __psrcdir = os.path.abspath(os.path.join(args.resdir, __conf['repo'] + "_proximity", "conway")) # 1) load the list of issues issues = load_xml(__srcdir) # 1b) load the list of persons persons = load_csv(__srcdir_csv) # 2) re-format the issues issues = parse_xml(issues, persons) # 3) load issue information via api if not args.skip_history: load_issue_via_api(issues, persons, __conf['issueTrackerURL']) # 4) update user data with Codeface database # mabye not nessecary issues = insert_user_data(issues, __conf) # 5) dump result to disk print_to_disk(issues, __resdir) # 6) export for Gephi print_to_disk_gephi(issues, __resdir) # 7) export for jira issue extraction to use them in dev-network-growth print_to_disk_extr(issues, __resdir) # 8) dump bug issues to disk print_to_disk_bugs(issues, __resdir, args.skip_history) log.info("Jira issue processing complete!")
def testDefaults(self): '''Check that the defaults are set correctly''' c = Configuration() self.assertEqual(c['idServiceHostname'], '127.0.0.1') self.assertEqual(c['idServicePort'], 8080)
def run(): # get all needed paths and argument for the method call. parser = argparse.ArgumentParser(prog="codeface-extraction-issues-jira", description="Codeface extraction") parser.add_argument("-c", "--config", help="Codeface configuration file", default="codeface.conf") parser.add_argument("-p", "--project", help="Project configuration file", required=True) parser.add_argument("resdir", help="Directory to store analysis results in") parser.add_argument("-s", "--skip_history", help="Skip methods that retrieve additional history information from the configured JIRA" + "server. This decreases the runtime and shuts off the external connection", action="store_true") # parse arguments args = parser.parse_args(sys.argv[1:]) __codeface_conf, __project_conf = map(os.path.abspath, (args.config, args.project)) # create configuration __conf = Configuration.load(__codeface_conf, __project_conf) # get source and results folders __srcdir = os.path.abspath(os.path.join(args.resdir, __conf["repo"] + "_proximity", "conway", "issues_xml")) __resdir = os.path.abspath(os.path.join(args.resdir, __conf["project"], __conf["tagging"])) __srcdir_csv = os.path.abspath(os.path.join(args.resdir, __conf["repo"] + "_proximity", "conway")) # get person folder # __psrcdir = os.path.abspath(os.path.join(args.resdir, __conf["repo"] + "_proximity", "conway")) # load the list of persons persons = load_csv(__srcdir_csv) # load the xml-file list file_list = [f for f in os.listdir(__srcdir) if os.path.isfile(os.path.join(__srcdir, f))] # creates empty result files clear_result_files(__resdir) # list for malformed or missing xml-files incorrect_files = [] # processes every xml-file for current_file in file_list: # 1) load the list of issues issues = load_xml(__srcdir, current_file) # if an error occurred while loading the xml-file if issues is None: incorrect_files.append(current_file) continue # 2) re-format the issues issues = parse_xml(issues, persons, args.skip_history) # 3) load issue information via api if not args.skip_history: load_issue_via_api(issues, persons, __conf["issueTrackerURL"]) # 4) update user data with Codeface database # mabye not nessecary issues = insert_user_data(issues, __conf) # 5) dump result to disk print_to_disk(issues, __resdir) # 6) export for Gephi print_to_disk_gephi(issues, __resdir) # 7) export for jira issue extraction to use them in dev-network-growth print_to_disk_extr(issues, __resdir) # 8) dump bug issues to disk print_to_disk_bugs(issues, __resdir) log.info("Jira issue processing complete!") if incorrect_files: log.info("Following files where malformed or not existing:: " + str(incorrect_files))
def clear_tables(self): conf = Configuration.load(self.codeface_conf, self.project_conf) dbm = DBManager(conf) for table in self.result_tables: dbm.doExecCommit("DELETE FROM {}".format(table))