def main(): parser = argparse.ArgumentParser( "Script to create the Venn Plots from BED files") parser.add_argument( "input", nargs='+', help="The directory containing BED files from pipeline") parser.add_argument("-r", "--reference", required=True, help="The reference BED file to compare against") parser.add_argument("-o", "--output", required=True, help="The output prefix") parser.add_argument( "-f", "--filter", ) args = parser.parse_args() ref_bed = bed12.loadbed(args.reference, False, False) print("Loaded Reference BED file. # junctions: " + str(len(ref_bed))) # Load all bed files bed_data = {} aligners = set() reads = set() junc_analysers = set() for bed_file in args.input: bed_base = os.path.splitext(os.path.basename(bed_file))[0] parts = bed_base.split('-') if (not parts[0] == "trinity"): aligners.add(parts[0]) reads.add(parts[1]) junc_analysers.add(parts[2]) bed_data[parts[2] + "-" + parts[0]] = bed12.loadbed( bed_file, False, False) print("Loaded: " + bed_file + "; # junctions: " + str(len(bed_data[parts[2] + "-" + parts[0]]))) print("Found these aligners: " + ', '.join(aligners)) print("Found these reads: " + ', '.join(reads)) print("Found these junction analysis tools: " + ', '.join(junc_analysers)) # Build table tab = [] for a in aligners: for j in junc_analysers: p = Performance() p.tp = len(bed_data[j + "-" + a] & ref_bed) p.fp = len(bed_data[j + "-" + a] - ref_bed) p.fn = len(ref_bed - bed_data[j + "-" + a]) tab.append(a + "\t" + j + "\t" + p.__str__()) # Output table to disk with open(args.output + "-junc_analysis.tab", "w") as tab_out: print("Aligner\tFilter\t" + Performance.shortHeader(), file=tab_out) for p in tab: print(p, file=tab_out)
def main(): parser = argparse.ArgumentParser( "Script to compare bed file against reference bed") parser.add_argument("input", nargs="+", help="The BED file to analyse") parser.add_argument("-r", "--reference", required=True, help="The reference BED file to compare against") parser.add_argument("-o", "--output", help="The output venn plot") args = parser.parse_args() ref_bed = bed12.loadbed(args.reference, False, False) print("Loaded Reference BED file. # junctions: ", len(ref_bed)) # Load all bed files print("Results:") print("File\t#junc\t", Performance.shortHeader()) recall = 0 precision = 0 f1 = 0 for bf in args.input: bed_data = bed12.loadbed(bf, False, False) # Build table tab = list() p = Performance() p.tp = len(ref_bed & bed_data) p.fp = len(bed_data - ref_bed) p.fn = len(ref_bed - bed_data) print(bf, "\t", len(bed_data), "\t", p) recall += p.recall() precision += p.precision() f1 += p.F1() if len(args.input) > 1: print("Mean recall: ", recall / len(args.input)) print("Mean precision: ", precision / len(args.input)) print("Mean f1: ", f1 / len(args.input)) if not args.output == None and len(args.input) == 1: # Create Venns plt = figure(1, figsize=(6, 6)) venn2(subsets=(p.fn, p.fp, p.tp), set_labels=(args.reference, args.input)) plt.show() plt.savefig(args.output)
def main(): parser = argparse.ArgumentParser( "Script to create the Venn Plots from BED files") parser.add_argument("input", nargs="+", help="The BED files to analyse") parser.add_argument("-r", "--reference", required=True, help="The reference BED file to compare against") parser.add_argument("-o", "--output", required=True, help="The output prefix") args = parser.parse_args() ref_bed = bed12.loadbed(args.reference, False, False) print("Loaded Reference BED file. # junctions: " + str(len(ref_bed))) # Load all bed files bed_data = {} aligners = set() reads = set() # junc_analysers = set() for bed_path in args.input: bed_file = os.path.split(bed_path)[1] bed_base = os.path.splitext(bed_file)[0] bed_data[bed_base] = bed12.loadbed(bed_path, False, False) parts = bed_base.split('-') aligners.add(parts[0]) reads.add(parts[1]) # junc_analysers.add(parts[2]) print("Loaded: " + bed_file + "; # junctions: " + str(len(bed_data[bed_base]))) print("Found these aligners: " + ', '.join(aligners)) print("Found these reads: " + ', '.join(reads)) # print ("Found these junction analysis tools: " + ', '.join(junc_analysers)) # Build table tab = list() for a in aligners: for r in reads: p = Performance() p.aligner = a p.input = r p.tp = len(ref_bed & bed_data[a + "-" + r]) p.fp = len(bed_data[a + "-" + r] - ref_bed) p.fn = len(ref_bed - bed_data[a + "-" + r]) tab.append(r + "\t" + a + "\t" + p.__str__()) # Output table to disk with open(args.output + "-align_reads.tab", "w") as tab_out: print("Dataset\tAligner\t" + Performance.shortHeader(), file=tab_out) for p in tab: print(p, file=tab_out) # Create Venns cols = rpy2.robjects.vectors.StrVector( ["lightblue", "purple", "green", "orange", "red"]) r = rpy2.robjects.r # Start the R thread base = importr("base") venn = importr("VennDiagram") grdevices = importr("grDevices") for r in reads: categories = list() categories.append("Reference") sets = list() sets.append(ref_bed) nums = dict() nums["area1"] = len(ref_bed) i = 2 for a in sorted(aligners): s = bed_data[a + "-" + r] sets.append(s) categories.append(a) nums["area{0}".format(i)] = len(s) i += 1 for num_combs in range(2, 6): for comb in itertools.combinations(range(1, 6), num_combs): index = "".join([str(x) for x in comb]) curr_sets = [sets[num - 1] for num in comb] nums["n{0}".format(index)] = len(set.intersection(*curr_sets)) grdevices.tiff(args.output + "-" + r + ".venn.tiff", width=960, height=960) venn.draw_quintuple_venn( height=5000, width=5000, # This will be in alphabetical order X( fill=cols, category=rpy2.robjects.vectors.StrVector(categories), margin=0.2, cat_dist=rpy2.robjects.vectors.FloatVector( [0.25, 0.3, 0.25, 0.25, 0.25]), cat_cex=3, cat_col=rpy2.robjects.vectors.StrVector( ["darkblue", "purple", "darkgreen", "darkorange", "darkred"]), cex=2, main="Comparison on junctions found by alignment tools", main_col="black", main_cex=8, sub="" + r + " dataset", sub_col="black", sub_cex=5, **nums) grdevices.dev_off()