def init_param_truth_file(truth_file, out_dir, hg_file): # Calculate tumor purity and ploidy # overall_ploidy = get_clone_ploidy(sorted_compl_cnv_file, hg_file, ["chrX", "chrY", "chrM"]) # Complement truth file with diploid regions new_truth_file = os.path.join(out_dir, "cntot_" + os.path.basename(truth_file)) with open(truth_file, "r") as tf: tf_lines = tf.readlines() if len(tf_lines[1].strip().split("\t")) == 5: new_tf_lines = [] for tf_line in tf_lines: (chr_id, start, end, cnA, cnB) = tf_line.strip().split("\t") new_tf_lines.append("\t".join( [chr_id, start, end, str(int(cnA) + int(cnB))])) with open(new_truth_file, "w") as new_tf: new_tf.writelines("\n".join(new_tf_lines)) truth_file = new_truth_file cnv_file = add_BED_complement(truth_file, hg_file, sort=True, out_dir=out_dir, hap_split=False) #cnv_file = re.sub(".bed$", "_cnv.bed", out_file) subprocess.Popen("rm %s" % re.sub("_sorted.bed", "_compl.bed", cnv_file), shell=True) if os.path.exists(new_truth_file): subprocess.Popen("rm %s" % new_truth_file, shell=True) return cnv_file
def init_param_truth_file(truth_file, out_dir, hg_file): # Calculate tumor purity and ploidy # overall_ploidy = get_clone_ploidy(sorted_compl_cnv_file, hg_file, ["chrX", "chrY", "chrM"]) # Complement truth file with diploid regions new_truth_file = os.path.join(out_dir, "cntot_" + os.path.basename(truth_file)) with open(truth_file, "r") as tf: tf_lines = tf.readlines() if len(tf_lines[1].strip().split("\t")) == 5: new_tf_lines = [] for tf_line in tf_lines: (chr_id, start, end, cnA, cnB) = tf_line.strip().split("\t") new_tf_lines.append("\t".join([chr_id, start, end, str(int(cnA)+int(cnB))])) with open(new_truth_file, "w") as new_tf: new_tf.writelines("\n".join(new_tf_lines)) truth_file = new_truth_file cnv_file = add_BED_complement(truth_file, hg_file, sort=True, out_dir=out_dir, hap_split=False) #cnv_file = re.sub(".bed$", "_cnv.bed", out_file) subprocess.Popen("rm %s" % re.sub("_sorted.bed", "_compl.bed", cnv_file), shell=True) if os.path.exists(new_truth_file): subprocess.Popen("rm %s" % new_truth_file, shell=True) return cnv_file
def main(): """Evaluate Canvas results on simulated data""" parser = OptionParser() parser.add_option("-s", "--canvas_sim_id", dest="canvas_sim_id", type="string", help="simulation id (find Canvas results directories strating with sim_id") parser.add_option("-d", "--canvas_sim_dir", dest="canvas_sim_dir", type="string", help="Canvas results directory") (options, args) = parser.parse_args() if options.canvas_sim_dir is None and options.canvas_sim_id is None: parser.error("Specify Canvas results simulation id or directory") if options.canvas_sim_dir is not None and options.canvas_sim_id is not None: parser.error("Canvas results simulation id and directory are mutually exclusive options") if options.canvas_sim_dir is not None: res_dirs = [options.canvas_sim_dir] if options.canvas_sim_id is not None: canvas_dir = "/illumina/scratch/tmp/users/ccolombo/Canvas/" res_dirs = [canvas_dir + cdir for cdir in os.listdir(canvas_dir) if os.path.isdir((canvas_dir + cdir)) and cdir.startswith(options.canvas_sim_id)] if len(res_dirs) == 0: print "No Canvas results directories found" hg_file = "/home/ccolombo/filtered_human.hg19.genome" for res_dir in res_dirs: print res_dir sim_dir = re.sub("/Canvas/", "/simulation/", res_dir) res_id = re.sub("sim", "ev", res_dir.split("/")[-1]) if not os.path.exists(res_dir): print("\nCanvas results directory %s does not exist\n\n" % res_dir) continue if not os.path.exists(res_dir): print("\nSimulation directory %s does not exist\n\n" % sim_dir) continue if os.path.basename(res_dir).startswith("simNorm") or res_dir.endswith("_purity100"): continue out_dir = os.path.join("/illumina/scratch/tmp/users/ccolombo/evaluation/EvaluateCNV", res_id) if not os.path.exists(out_dir): os.mkdir(out_dir) truth_file = create_het_cn_file(sim_dir, os.path.join(out_dir, "het_truth_file.bed"), no_dipl=True, hap_split=True, round=True) #truth_file = "/illumina/scratch/tmp/users/ccolombo/simulation/HCC2218flt.bed" truth_file = add_BED_complement(truth_file, hg_file, sort=False, out_dir=out_dir, hap_split=True) perc_file = create_var_perc_file(sim_dir, os.path.join(out_dir, "var_perc.bed"), no_dipl=True) # Write simulation parameters to file out_file = os.path.join(out_dir, res_id + "_par.txt") if not os.path.exists(out_file): get_sim_params(sim_dir, truth_file, perc_file, out_file, hg_file) # Run EvaluateCNV out_file = os.path.join(out_dir, res_id + ".txt") excl_file = "/illumina/scratch/tmp/users/ccolombo/evaluation/sim_filter.bed" if not os.path.exists(out_file): #if True: evaluate_CNV(res_dir, truth_file, excl_file, out_file) # Run EvaluateCNV only on heterogeneous variants out_file = os.path.join(out_dir, res_id + "_onlyhet.txt") #if True: if not os.path.exists(out_file): with open(excl_file, "r") as ef: excl_vars = ef.readlines() with open(perc_file, "r") as pf: for line in pf: (chr_id, start, end, cnA, cnB, perc) = line.strip().split("\t") if float(perc) >= 0.8 and chr_id not in ["chrX", "chrY", "chrM"]: excl_vars.append("\t".join([chr_id, start, end]) + "\n") os.system("rm %s" % (perc_file)) excl_file = os.path.join(out_dir, "filter_onlyhet.bed") with open(excl_file, "w") as wf: wf.writelines(excl_vars) evaluate_CNV(res_dir, truth_file, excl_file, out_file)
def main(): """Evaluate Canvas results on simulated data""" parser = OptionParser() parser.add_option( "-s", "--canvas_sim_id", dest="canvas_sim_id", type="string", help= "simulation id (find Canvas results directories strating with sim_id") parser.add_option("-d", "--canvas_sim_dir", dest="canvas_sim_dir", type="string", help="Canvas results directory") (options, args) = parser.parse_args() if options.canvas_sim_dir is None and options.canvas_sim_id is None: parser.error("Specify Canvas results simulation id or directory") if options.canvas_sim_dir is not None and options.canvas_sim_id is not None: parser.error( "Canvas results simulation id and directory are mutually exclusive options" ) if options.canvas_sim_dir is not None: res_dirs = [options.canvas_sim_dir] if options.canvas_sim_id is not None: canvas_dir = "/illumina/scratch/tmp/users/ccolombo/Canvas/" res_dirs = [ canvas_dir + cdir for cdir in os.listdir(canvas_dir) if os.path.isdir((canvas_dir + cdir)) and cdir.startswith(options.canvas_sim_id) ] if len(res_dirs) == 0: print "No Canvas results directories found" hg_file = "/home/ccolombo/filtered_human.hg19.genome" for res_dir in res_dirs: print res_dir sim_dir = re.sub("/Canvas/", "/simulation/", res_dir) res_id = re.sub("sim", "ev", res_dir.split("/")[-1]) if not os.path.exists(res_dir): print("\nCanvas results directory %s does not exist\n\n" % res_dir) continue if not os.path.exists(res_dir): print("\nSimulation directory %s does not exist\n\n" % sim_dir) continue if os.path.basename(res_dir).startswith("simNorm") or res_dir.endswith( "_purity100"): continue out_dir = os.path.join( "/illumina/scratch/tmp/users/ccolombo/evaluation/EvaluateCNV", res_id) if not os.path.exists(out_dir): os.mkdir(out_dir) truth_file = create_het_cn_file(sim_dir, os.path.join(out_dir, "het_truth_file.bed"), no_dipl=True, hap_split=True, round=True) #truth_file = "/illumina/scratch/tmp/users/ccolombo/simulation/HCC2218flt.bed" truth_file = add_BED_complement(truth_file, hg_file, sort=False, out_dir=out_dir, hap_split=True) perc_file = create_var_perc_file(sim_dir, os.path.join(out_dir, "var_perc.bed"), no_dipl=True) # Write simulation parameters to file out_file = os.path.join(out_dir, res_id + "_par.txt") if not os.path.exists(out_file): get_sim_params(sim_dir, truth_file, perc_file, out_file, hg_file) # Run EvaluateCNV out_file = os.path.join(out_dir, res_id + ".txt") excl_file = "/illumina/scratch/tmp/users/ccolombo/evaluation/sim_filter.bed" if not os.path.exists(out_file): #if True: evaluate_CNV(res_dir, truth_file, excl_file, out_file) # Run EvaluateCNV only on heterogeneous variants out_file = os.path.join(out_dir, res_id + "_onlyhet.txt") #if True: if not os.path.exists(out_file): with open(excl_file, "r") as ef: excl_vars = ef.readlines() with open(perc_file, "r") as pf: for line in pf: (chr_id, start, end, cnA, cnB, perc) = line.strip().split("\t") if float(perc) >= 0.8 and chr_id not in [ "chrX", "chrY", "chrM" ]: excl_vars.append("\t".join([chr_id, start, end]) + "\n") os.system("rm %s" % (perc_file)) excl_file = os.path.join(out_dir, "filter_onlyhet.bed") with open(excl_file, "w") as wf: wf.writelines(excl_vars) evaluate_CNV(res_dir, truth_file, excl_file, out_file)