def collectRptScout(DIR, tool): # FIRST: Get any applicable RM output stats for org in data_map.keys(): for f in f_list: RS_job_file = DIR + "/../job_log/{prefix}.{org}.s0.f{f}".format(prefix=tool_prefix[tool], org=org, f=f) RM_job_file = DIR + "/../job_log/rm.{prefix}.{org}.s0.f{f}".format(prefix=tool_prefix[tool], org=org, f=f) RS_dir = DIR + "/" + ("{org}.s0.f{f}".format(org=org, f=f)).upper() RM_file = RS_dir + "/" + "{org}.fa.out".format(org=org, f=f) blast_file = RS_dir + "/" + "{org}.s0.f{f}.RS.blast.6.txt.bz2".format(org=org, f=f) pra_output = "{DIR}/{org}.s0.f{f}.pra.txt".format(DIR=RS_dir, org=org, f=f) tool_output = RM_file real_repeats = data_map[org] + ".out" H = create_stats_hash(tool, org, None, int(f)) # Get stats from RM run try: Counts, Stats, Sets = perform_stats.perform_stats(real_repeats, tool_output, None) H["tp"], H["fp"], H["fn"], H["tn"] = Counts H["tpr"], H["tnr"], H["ppv"], H["npv"], H["fpr"], H["fdr"] = Stats except Exception as E: pass # raise E; # Get resource usage from RPT_SCOUT run if os.path.exists(RS_job_file): p = redhawk.loadPBS(open(RS_job_file, "rb"))[0] try: if p.efile_exists(): H["ToolCpuTime"], H["ToolWallTime"], H["ToolMem"], H["ToolVMem"] = p.getResources() except: pass redhawk.storePBS([p], open(RS_job_file, "wb")) # Get resource usage from RM run if os.path.exists(RM_job_file): p = redhawk.loadPBS(open(RM_job_file, "rb"))[0] try: if p.efile_exists(): H["RMCpuTime"], H["RMWallTime"], H["RMMem"], H["RMVMem"] = p.getResources() except: pass redhawk.storePBS([p], open(RM_job_file, "wb")) if os.path.exists(blast_file): cmd = "bzcat {blast_output} | ./pra_analysis2 {output}".format( blast_output=blast_file, output=pra_output ) subprocess.call(cmd, shell=True) query_cover, target_cover, Used = parse_pra_output.parse_pra_output(pra_output, "exclude.txt") H["ConCoverage"], H["QuCoverage"] = query_cover, target_cover stats_map[(tool, org, None, f)] = H return None
]) + "\n") ###### # Calculate statistics (not bothering with parallelization yet) print_str = "{:<12}" + "{:<5}" + "".join("{:<14}" * 4) + "".join( "{:<14}" * 6) + "".join("{:<14}" * 8) + "\n" with open(args.results_dir + "/" + args.stats_file, "w") as fp: fp.write( print_str.format("#tool", "seed", "tp", "fp", "fn", "tn", "tpr", "tnr", "ppv", "npv", "fpr", "fdr", "ToolCpuTime", "ToolWallTime", "ToolMem", "ToolVMem", "RMCpuTime", "RMWallTime", "RMMem", "RMVMem")) for key in test_tools: for p in job_dic[key]: Counts, Stats, Sets = perform_stats.perform_stats( p.seq_file + ".out", p.rm_output, None) Stats = [round(x, 5) for x in Stats] fp.write( print_str.format(*([key, p.seed_num] + list(Counts) + list(Stats) + list(p.tool_resources) + list(p.getResources())))) # for i in range(len(J)): # if RAIDER_JOBS: # for j in range(len(RAIDER_JOBS[i])): # p = RAIDER_JOBS[i][j] # Counts, Stats, Sets = perform_stats.perform_stats(J[i].sim_output + ".out", p.rm_output, None) # Stats = [round(x,5) for x in Stats] # fp.write(print_str.format(*(["raider", p.seed_num] + list(Counts) + list(Stats) + list(p.tool_resources) + list(p.getResources())))) # if SCOUT_JOBS: # p = SCOUT_JOBS[i]
def collectRaider(DIR, tool): # FIRST: Get any applicable RM output stats for org in data_map.keys(): for seed_num in seed_map.keys(): for f in f_list: print("File: " + org + " " + str(seed_num) + " " + str(f) + "\n") RAIDER_job_file = DIR + "/../job_log/{prefix}.{org}.s{seed_num}.f{f}".format( prefix=tool_prefix[tool], org=org, seed_num=seed_num, f=f ) RM_job_file = DIR + "/../job_log/rm.{prefix}.{org}.s{seed_num}.f{f}".format( prefix=tool_prefix[tool], org=org, seed_num=seed_num, f=f ) RM_dir = DIR + "/" + ("{org}.s{seed}.f{f}".format(org=org, seed=seed_num, f=f)).upper() RM_file = RM_dir + "/" + "{org}.fa.out".format(org=org, seed=seed_num, f=f) blast_file = RM_dir + "/" + "{org}.s{seed}.f{f}.blast.6.txt.bz2".format(org=org, seed=seed_num, f=f) pra_output = RM_dir + "/" + "{org}.s{seed}.f{f}.pra.txt".format(org=org, seed=seed_num, f=f) tool_output = RM_file real_repeats = data_map[org] + ".out" H = create_stats_hash(tool, org, int(seed_num), int(f)) seed = convert_seed(seed_map[seed_num]) seed_len = len(seed) seed_weight = seed.count("1") seed_ratio = seed_weight / (float(seed_len)) H["l"] = seed_len H["w"] = seed_weight H["w/l"] = seed_ratio # Get stats from RM run try: Counts, Stats, Sets = perform_stats.perform_stats(real_repeats, tool_output, None) H["tp"], H["fp"], H["fn"], H["tn"] = Counts H["tpr"], H["tnr"], H["ppv"], H["npv"], H["fpr"], H["fdr"] = Stats except Exception as E: pass # raise E; # Get resource usage from RAIDER run if os.path.exists(RAIDER_job_file): p = redhawk.loadPBS(open(RAIDER_job_file, "rb"))[0] try: if p.efile_exists(): H["ToolCpuTime"], H["ToolWallTime"], H["ToolMem"], H["ToolVMem"] = p.getResources() except: pass redhawk.storePBS([p], open(RAIDER_job_file, "wb")) # Get resource usage from RM run if os.path.exists(RM_job_file): p = redhawk.loadPBS(open(RM_job_file, "rb"))[0] try: if p.efile_exists(): H["RMCpuTime"], H["RMWallTime"], H["RMMem"], H["RMVMem"] = p.getResources() except: pass redhawk.storePBS([p], open(RM_job_file, "wb")) if os.path.exists(blast_file): if not os.path.exists(pra_output): cmd = "bzcat {blast_output} | ./pra_analysis2 {output}".format( blast_output=blast_file, output=pra_output ) print("cmd: " + cmd) # subprocess.call(cmd, shell=True) query_cover, target_cover, Used = parse_pra_output.parse_pra_output(pra_output, "exclude.txt") H["ConCoverage"], H["QuCoverage"] = query_cover, target_cover stats_map[(tool, org, seed_num, f)] = H return None
with open(args.results_dir + "/seed_file.txt", "w") as fp: fp.write("\n".join(["{index:<5}{seed}".format(index=i,seed=s) for i,s in enumerate(seed_list)]) + "\n") if job_dic['araider']: with open(args.results_dir + "/seed_file.txt", "w") as fp: fp.write("\n".join(["{index:<5}{seed}".format(index=i,seed=s) for i,s in enumerate(seed_list)]) + "\n") ###### # Calculate statistics (not bothering with parallelization yet) print_str = "{:<12}" + "{:<5}" + "".join("{:<14}"*4) + "".join("{:<14}"*6) + "".join("{:<14}"*8) + "\n" with open(args.results_dir + "/" + args.stats_file, "w") as fp: fp.write(print_str.format("#tool", "seed", "tp", "fp", "fn", "tn", "tpr", "tnr", "ppv", "npv", "fpr", "fdr","ToolCpuTime", "ToolWallTime", "ToolMem", "ToolVMem", "RMCpuTime", "RMWallTime", "RMMem", "RMVMem")) for key in test_tools: for p in job_dic[key]: Counts, Stats, Sets = perform_stats.perform_stats(p.seq_file + ".out", p.rm_output, None) Stats = [round(x,5) for x in Stats] fp.write(print_str.format(*([key, p.seed_num] + list(Counts) + list(Stats) + list(p.tool_resources) + list(p.getResources())))) # for i in range(len(J)): # if RAIDER_JOBS: # for j in range(len(RAIDER_JOBS[i])): # p = RAIDER_JOBS[i][j] # Counts, Stats, Sets = perform_stats.perform_stats(J[i].sim_output + ".out", p.rm_output, None) # Stats = [round(x,5) for x in Stats] # fp.write(print_str.format(*(["raider", p.seed_num] + list(Counts) + list(Stats) + list(p.tool_resources) + list(p.getResources())))) # if SCOUT_JOBS: # p = SCOUT_JOBS[i] # CountSJ, StatsSJ, SetsSJ = perform_stats.perform_stats(J[i].sim_output + ".out", p.rm_output, None) # StatsSJ = [round(x,5) for x in StatsSJ] # fp.write(print_str.format(*(["repscout", "NA"] + list(CountSJ) + list(StatsSJ) + list(p.tool_resources) + list(p.getResources()))))