Exemplo n.º 1
0
def collectRptScout(DIR, tool):
    # FIRST: Get any applicable RM output stats
    for org in data_map.keys():
        for f in f_list:
            RS_job_file = DIR + "/../job_log/{prefix}.{org}.s0.f{f}".format(prefix=tool_prefix[tool], org=org, f=f)
            RM_job_file = DIR + "/../job_log/rm.{prefix}.{org}.s0.f{f}".format(prefix=tool_prefix[tool], org=org, f=f)
            RS_dir = DIR + "/" + ("{org}.s0.f{f}".format(org=org, f=f)).upper()
            RM_file = RS_dir + "/" + "{org}.fa.out".format(org=org, f=f)
            blast_file = RS_dir + "/" + "{org}.s0.f{f}.RS.blast.6.txt.bz2".format(org=org, f=f)
            pra_output = "{DIR}/{org}.s0.f{f}.pra.txt".format(DIR=RS_dir, org=org, f=f)

            tool_output = RM_file
            real_repeats = data_map[org] + ".out"

            H = create_stats_hash(tool, org, None, int(f))

            # Get stats from RM run
            try:
                Counts, Stats, Sets = perform_stats.perform_stats(real_repeats, tool_output, None)
                H["tp"], H["fp"], H["fn"], H["tn"] = Counts
                H["tpr"], H["tnr"], H["ppv"], H["npv"], H["fpr"], H["fdr"] = Stats
            except Exception as E:
                pass
                # raise E;

            # Get resource usage from RPT_SCOUT run
            if os.path.exists(RS_job_file):
                p = redhawk.loadPBS(open(RS_job_file, "rb"))[0]
                try:
                    if p.efile_exists():
                        H["ToolCpuTime"], H["ToolWallTime"], H["ToolMem"], H["ToolVMem"] = p.getResources()
                except:
                    pass
                redhawk.storePBS([p], open(RS_job_file, "wb"))

            # Get resource usage from RM run
            if os.path.exists(RM_job_file):
                p = redhawk.loadPBS(open(RM_job_file, "rb"))[0]
                try:
                    if p.efile_exists():
                        H["RMCpuTime"], H["RMWallTime"], H["RMMem"], H["RMVMem"] = p.getResources()
                except:
                    pass
                redhawk.storePBS([p], open(RM_job_file, "wb"))

            if os.path.exists(blast_file):
                cmd = "bzcat {blast_output} | ./pra_analysis2 {output}".format(
                    blast_output=blast_file, output=pra_output
                )
                subprocess.call(cmd, shell=True)
                query_cover, target_cover, Used = parse_pra_output.parse_pra_output(pra_output, "exclude.txt")
                H["ConCoverage"], H["QuCoverage"] = query_cover, target_cover

            stats_map[(tool, org, None, f)] = H
    return None
Exemplo n.º 2
0
            ]) + "\n")

    ######
    # Calculate statistics (not bothering with parallelization yet)
    print_str = "{:<12}" + "{:<5}" + "".join("{:<14}" * 4) + "".join(
        "{:<14}" * 6) + "".join("{:<14}" * 8) + "\n"
    with open(args.results_dir + "/" + args.stats_file, "w") as fp:
        fp.write(
            print_str.format("#tool", "seed", "tp", "fp", "fn", "tn", "tpr",
                             "tnr", "ppv", "npv", "fpr", "fdr", "ToolCpuTime",
                             "ToolWallTime", "ToolMem", "ToolVMem",
                             "RMCpuTime", "RMWallTime", "RMMem", "RMVMem"))

        for key in test_tools:
            for p in job_dic[key]:
                Counts, Stats, Sets = perform_stats.perform_stats(
                    p.seq_file + ".out", p.rm_output, None)
                Stats = [round(x, 5) for x in Stats]
                fp.write(
                    print_str.format(*([key, p.seed_num] + list(Counts) +
                                       list(Stats) + list(p.tool_resources) +
                                       list(p.getResources()))))

        # for i in range(len(J)):
        #     if RAIDER_JOBS:
        #         for j in range(len(RAIDER_JOBS[i])):
        #             p = RAIDER_JOBS[i][j]
        #             Counts, Stats, Sets = perform_stats.perform_stats(J[i].sim_output + ".out", p.rm_output, None)
        #             Stats = [round(x,5) for x in Stats]
        #             fp.write(print_str.format(*(["raider", p.seed_num] + list(Counts) + list(Stats) + list(p.tool_resources) + list(p.getResources()))))
        #     if SCOUT_JOBS:
        #         p = SCOUT_JOBS[i]
Exemplo n.º 3
0
def collectRaider(DIR, tool):
    # FIRST: Get any applicable RM output stats
    for org in data_map.keys():
        for seed_num in seed_map.keys():
            for f in f_list:
                print("File: " + org + " " + str(seed_num) + " " + str(f) + "\n")
                RAIDER_job_file = DIR + "/../job_log/{prefix}.{org}.s{seed_num}.f{f}".format(
                    prefix=tool_prefix[tool], org=org, seed_num=seed_num, f=f
                )
                RM_job_file = DIR + "/../job_log/rm.{prefix}.{org}.s{seed_num}.f{f}".format(
                    prefix=tool_prefix[tool], org=org, seed_num=seed_num, f=f
                )
                RM_dir = DIR + "/" + ("{org}.s{seed}.f{f}".format(org=org, seed=seed_num, f=f)).upper()
                RM_file = RM_dir + "/" + "{org}.fa.out".format(org=org, seed=seed_num, f=f)
                blast_file = RM_dir + "/" + "{org}.s{seed}.f{f}.blast.6.txt.bz2".format(org=org, seed=seed_num, f=f)
                pra_output = RM_dir + "/" + "{org}.s{seed}.f{f}.pra.txt".format(org=org, seed=seed_num, f=f)

                tool_output = RM_file
                real_repeats = data_map[org] + ".out"

                H = create_stats_hash(tool, org, int(seed_num), int(f))

                seed = convert_seed(seed_map[seed_num])
                seed_len = len(seed)
                seed_weight = seed.count("1")
                seed_ratio = seed_weight / (float(seed_len))

                H["l"] = seed_len
                H["w"] = seed_weight
                H["w/l"] = seed_ratio

                # Get stats from RM run
                try:
                    Counts, Stats, Sets = perform_stats.perform_stats(real_repeats, tool_output, None)
                    H["tp"], H["fp"], H["fn"], H["tn"] = Counts
                    H["tpr"], H["tnr"], H["ppv"], H["npv"], H["fpr"], H["fdr"] = Stats
                except Exception as E:
                    pass
                    # raise E;

                # Get resource usage from RAIDER run
                if os.path.exists(RAIDER_job_file):
                    p = redhawk.loadPBS(open(RAIDER_job_file, "rb"))[0]
                    try:
                        if p.efile_exists():
                            H["ToolCpuTime"], H["ToolWallTime"], H["ToolMem"], H["ToolVMem"] = p.getResources()
                    except:
                        pass
                    redhawk.storePBS([p], open(RAIDER_job_file, "wb"))

                # Get resource usage from RM run
                if os.path.exists(RM_job_file):
                    p = redhawk.loadPBS(open(RM_job_file, "rb"))[0]
                    try:
                        if p.efile_exists():
                            H["RMCpuTime"], H["RMWallTime"], H["RMMem"], H["RMVMem"] = p.getResources()
                    except:
                        pass
                    redhawk.storePBS([p], open(RM_job_file, "wb"))

                if os.path.exists(blast_file):
                    if not os.path.exists(pra_output):
                        cmd = "bzcat {blast_output} | ./pra_analysis2 {output}".format(
                            blast_output=blast_file, output=pra_output
                        )
                        print("cmd: " + cmd)
                        # subprocess.call(cmd, shell=True)
                    query_cover, target_cover, Used = parse_pra_output.parse_pra_output(pra_output, "exclude.txt")
                    H["ConCoverage"], H["QuCoverage"] = query_cover, target_cover

                stats_map[(tool, org, seed_num, f)] = H
    return None
Exemplo n.º 4
0
        with open(args.results_dir + "/seed_file.txt", "w") as fp:
            fp.write("\n".join(["{index:<5}{seed}".format(index=i,seed=s) for i,s in enumerate(seed_list)]) + "\n")
            
    if job_dic['araider']:
        with open(args.results_dir + "/seed_file.txt", "w") as fp:
            fp.write("\n".join(["{index:<5}{seed}".format(index=i,seed=s) for i,s in enumerate(seed_list)]) + "\n")

    ######
    # Calculate statistics (not bothering with parallelization yet)
    print_str = "{:<12}" + "{:<5}" + "".join("{:<14}"*4) + "".join("{:<14}"*6) + "".join("{:<14}"*8) + "\n"
    with open(args.results_dir + "/" + args.stats_file, "w") as fp:
        fp.write(print_str.format("#tool", "seed", "tp", "fp", "fn", "tn", "tpr", "tnr", "ppv", "npv", "fpr", "fdr","ToolCpuTime", "ToolWallTime", "ToolMem", "ToolVMem", "RMCpuTime", "RMWallTime", "RMMem", "RMVMem"))

        for key in test_tools:
            for p in job_dic[key]:
                Counts, Stats, Sets = perform_stats.perform_stats(p.seq_file + ".out", p.rm_output, None)
                Stats = [round(x,5) for x in Stats]
                fp.write(print_str.format(*([key, p.seed_num] + list(Counts) + list(Stats) + list(p.tool_resources) + list(p.getResources()))))

        # for i in range(len(J)):
        #     if RAIDER_JOBS:
        #         for j in range(len(RAIDER_JOBS[i])):
        #             p = RAIDER_JOBS[i][j]
        #             Counts, Stats, Sets = perform_stats.perform_stats(J[i].sim_output + ".out", p.rm_output, None)
        #             Stats = [round(x,5) for x in Stats]
        #             fp.write(print_str.format(*(["raider", p.seed_num] + list(Counts) + list(Stats) + list(p.tool_resources) + list(p.getResources()))))
        #     if SCOUT_JOBS:
        #         p = SCOUT_JOBS[i]
        #         CountSJ, StatsSJ, SetsSJ = perform_stats.perform_stats(J[i].sim_output + ".out", p.rm_output, None)
        #         StatsSJ = [round(x,5) for x in StatsSJ]
        #         fp.write(print_str.format(*(["repscout", "NA"] + list(CountSJ) + list(StatsSJ) + list(p.tool_resources) + list(p.getResources()))))