Python SeparateByChrom.combineAllGraphFiles示例

示例#1

0

显示文件

def main(argv):
    
    parser = OptionParser()
    parser.add_option("-s", "--species", action="store", type="string",
                      dest="species", help="species under consideration", metavar="<str>")
    parser.add_option("-b", "--raw_bed_file", action="store", type="string",
                      dest="bed_file", help="raw bed file", metavar="<file>")
    parser.add_option("-t", "--threshold", action="store", type="int",
                      dest="threshold", help="threshold for copy number", metavar="<int>")          
    parser.add_option("-o", "--output_file_name", action="store", type="string",
                      dest="out_file", help="output file name", metavar="<file>")
    
    (opt, args) = parser.parse_args(argv)
    if len(argv) < 8:
            parser.print_help()
            sys.exit(1)
    
    if opt.species in GenomeData.species_chroms.keys():
        chroms = GenomeData.species_chroms[opt.species];
    else:
        print "This species is not recognized, exiting";
        sys.exit(1);
    
    SeparateByChrom.separateByChrom(chroms, opt.bed_file, '.bed1')
    
    for chrom in chroms:
        if (Utility.fileExists(chrom + ".bed1")):
            strand_broken_remove(chrom, opt.threshold)
    
    SeparateByChrom.combineAllGraphFiles(chroms, '.bed2', opt.out_file)
    SeparateByChrom.cleanup(chroms, '.bed1')
    SeparateByChrom.cleanup(chroms, '.bed2')

示例#2

0

显示文件

文件： filter_raw_tags_by_islands_dev.py 项目： PrinnyJungle/Bioinformatics

def main(argv):
	parser = OptionParser()
	parser.add_option("-s", "--species", action="store",
			  type="string", dest="species",
			  help="species, mm8, hg18, etc", metavar="<str>")
	parser.add_option("-a", "--rawbedfile", action="store",
			  type="string", dest="bedfile",
			  metavar="<file>",
			  help="raw data file in bed format")
	parser.add_option("-i", "--shift", action="store",
			  type="int", dest="shift",
			  metavar="<int>",
			  help="shift for finding the center of DNA fragment represented by the read")
	parser.add_option("-b", "--islandfile", action="store", type="string",
			  dest="islandbedfile", metavar="<file>",
			  help="island file")
	parser.add_option("-o", "--outfile", action="store", type="string",
			  dest="out_file", metavar="<file>",
			  help="filtered raw bed file")
	
	(opt, args) = parser.parse_args(argv)
	if len(argv) < 10:
        	parser.print_help()
        	sys.exit(1)
	
	if opt.species in species_chroms.keys():
		chroms = species_chroms[opt.species];
	else:
		print "This species is not recognized, exiting";
		sys.exit(1);
	
	islands = BED_revised.BED_annotated(opt.species, opt.islandbedfile, "BED3", 0);
	
	libName = (opt.bedfile).split('/')[-1] #remove directories
	libName = libName.split('.')[0] #remove .bed
	extension = "-" + libName +'.bed1'
	SeparateByChrom.separateByChrom(chroms, opt.bedfile, extension)
	for chrom in chroms:
		if chrom in islands.keys():
			outfile = chrom + "-filtered" + extension 
			# [(island, rc)]
			island_rc = filter_tags_by_islands(chrom + extension, islands[chrom], opt.shift, outfile, boundary_extension=0)
			output_island_with_rc(island_rc, chrom + "-islands" + extension)
			
	SeparateByChrom.combineAllGraphFiles(chroms, "-islands" + extension, "islands" + extension);
	SeparateByChrom.combineAllGraphFiles(chroms, "-filtered" + extension, opt.out_file);
	
	SeparateByChrom.cleanup(chroms, extension);
	SeparateByChrom.cleanup(chroms, "-filtered" + extension);
	SeparateByChrom.cleanup(chroms, "-islands" + extension);

示例#3

0

显示文件

文件： run-make-graph-file-by-chrom.py 项目： lux563624348/Bioinformatics

def main(argv):
    """
    Note the window_size and the fragment_size are both input as strings, as they are used in
    a shell script in makeGraphFile.
    """
    parser = OptionParser()
    parser.add_option("-s",
                      "--species",
                      action="store",
                      type="string",
                      dest="species",
                      help="mm8,hg18,dm2,etc",
                      metavar="<str>")
    parser.add_option("-b",
                      "--bed_file",
                      action="store",
                      type="string",
                      dest="bedfile",
                      help="bed file to make graph file of",
                      metavar="<file>")
    parser.add_option("-w",
                      "--window_size",
                      action="store",
                      type="int",
                      dest="window_size",
                      help="window size",
                      metavar="<int>")
    parser.add_option("-i",
                      "--fragment_size",
                      action="store",
                      type="int",
                      dest="fragment_size",
                      help="size of fragments after CHIP experiment",
                      metavar="<int>")
    parser.add_option("-o",
                      "--outfile",
                      action="store",
                      type="string",
                      dest="outfile",
                      help="output bed summary file name",
                      metavar="<file>")
    (opt, args) = parser.parse_args(argv)
    if len(argv) < 10:
        parser.print_help()
        sys.exit(1)

    if opt.species in GenomeData.species_chroms.keys():
        chroms = GenomeData.species_chroms[opt.species]
        chrom_lengths = GenomeData.species_chrom_lengths[opt.species]
        SeparateByChrom.separateByChrom(chroms, opt.bedfile, ".bed")
        makeGraphFile(chroms, chrom_lengths, opt.window_size,
                      opt.fragment_size)
        final_output_file = opt.outfile
        final_output_file = SeparateByChrom.combineAllGraphFiles(
            chroms, ".graph", final_output_file)
        SeparateByChrom.cleanup(chroms, ".bed")
        SeparateByChrom.cleanup(chroms, ".graph")
    else:
        print opt.species + " is not in the species list "

示例#4

0

显示文件

文件： find_union_islands.py 项目： NYU-BFX/hic-bench

def main(argv):
	parser = OptionParser()
	parser.add_option("-a", "--islandfile1", action="store", type="string", dest="islandfile1", metavar="<file>", help="file 1 with islands info to be unioned")
	parser.add_option("-b", "--islandfile2", action="store", type="string", dest="islandfile2", metavar="<file>", help="file 2 with islands info to be unioned; if no, type in any word")
	parser.add_option("-s", "--species", action="store", type="string", dest="species", help="species, mm8 or hg18", metavar="<str>")
	parser.add_option("-o", "--outputfile", action="store", type="string", dest="outfile", metavar="<file>", help="output file name")

	(opt, args) = parser.parse_args(argv)
	if len(argv) < 8:
        	parser.print_help()
        	sys.exit(1)
	
	if opt.species in species_chroms.keys():
		chroms = species_chroms[opt.species];
	else:
		print "This species is not recognized, exiting";
		sys.exit(1);
	
	
	SeparateByChrom.separateByChrom(chroms, opt.islandfile1, '.island1')
	
	if Utility.fileExists(opt.islandfile2):
		SeparateByChrom.separateByChrom(chroms, opt.islandfile2, '.island2')
		
		for chrom in chroms: 
			f = open(chrom + '.output', 'w')
			bed_vals_1 = BED.BED(opt.species, chrom+'.island1', "BED3", 0)
			bed_vals_2 = BED.BED(opt.species, chrom+'.island2', "BED3", 0)
			if len(bed_vals_1[chrom]) > 0 or len(bed_vals_2[chrom]) > 0:
				islandlist = bed_vals_1[chrom] + bed_vals_2[chrom];
				union_islands_to_file(islandlist, f)
			f.close()
		SeparateByChrom.cleanup(chroms, '.island2')
	else:
		for chrom in chroms: 
			f = open(chrom + '.output', 'w')
			bed_vals_1 = BED.BED(opt.species, chrom+'.island1', "BED3", 0)
			if len(bed_vals_1[chrom]) > 0:
				islandlist = bed_vals_1[chrom]
				union_islands_to_file(islandlist, f)
			f.close()

	SeparateByChrom.combineAllGraphFiles(chroms, '.output', opt.outfile);
	
	SeparateByChrom.cleanup(chroms, '.output')
	SeparateByChrom.cleanup(chroms, '.island1')

示例#5

0

显示文件

文件： run-make-graph-file-by-chrom_bam.py 项目： dariober/SICERpy

def main(argv):
    """
    Note the window_size and the fragment_size are both input as strings, as they are used in
    a shell script in makeGraphFile.
    """
    parser = OptionParser()
    parser.add_option("-s", "--species", action="store", type="string",
                      dest="species", help="mm8,hg18,dm2,etc", metavar="<str>")
    parser.add_option("-b", "--bed_file", action="store", type="string",
                      dest="bamfile", help="bed file to make graph file of",
                      metavar="<file>")
    parser.add_option("-w", "--window_size", action="store", type="int",
                      dest="window_size", help="window size", metavar="<int>")
    parser.add_option("-i", "--fragment_size", action="store", type="int",
                      dest="fragment_size",
                      help="size of fragments after CHIP experiment",
                      metavar="<int>")
    parser.add_option("-o", "--outfile", action="store", type="string",
                      dest="outfile", help="output bed summary file name",
                      metavar="<file>")

    (opt, args) = parser.parse_args(argv)
    #if len(argv) < 10:
    #    sys.stderr.write(str(len(argv)) + '\n')
    #    parser.print_help()
    #    sys.exit(1)
    #
    #if opt.species in GenomeData.species_chroms.keys():
    #    chroms = GenomeData.species_chroms[opt.species];
    #
	#chrom_lengths = GenomeData.species_chrom_lengths[opt.species];
    chromsDict= SeparateByChrom.getChromsFromBam(opt.bamfile)

    SeparateByChrom.separateByChromBamToBed(chromsDict.keys(), opt.bamfile, '.bed');

    makeGraphFile(chromsDict.keys(), chromsDict, opt.window_size, opt.fragment_size);
    final_output_file = opt.outfile;
    final_output_file = SeparateByChrom.combineAllGraphFiles(chromsDict.keys(), ".graph", final_output_file);
    SeparateByChrom.cleanup(chromsDict.keys(), ".bed");

    SeparateByChrom.cleanup(chromsDict.keys(), ".graph");

示例#6

0

显示文件

文件： filter_raw_tags_by_islands.py 项目： jrtejedor/SICERpy

def main(argv):
	parser = OptionParser()
	parser.add_option("-s", "--species", action="store",
			  type="string", dest="species",
			  help="species, mm8, hg18", metavar="<str>")
	parser.add_option("-a", "--rawbedfile", action="store",
			  type="string", dest="bedfile",
			  metavar="<file>",
			  help="raw data file in bed format")
	parser.add_option("-i", "--fragment_size", action="store",
			  type="int", dest="fragment_size",
			  metavar="<int>",
			  help="average size of a fragment after CHIP experiment")
	parser.add_option("-b", "--islandfile", action="store", type="string",
			  dest="islandbedfile", metavar="<file>",
			  help="island file")
	parser.add_option("-o", "--outfile", action="store", type="string",
			  dest="out_file", metavar="<file>",
			  help="filtered raw bed file")
	
	(opt, args) = parser.parse_args(argv)
	if len(argv) < 10:
        	parser.print_help()
        	sys.exit(1)
	
	if opt.species in species_chroms.keys():
		chroms = species_chroms[opt.species];
	else:
		print "This species is not recognized, exiting";
		sys.exit(1);
	
	islands = BED.BED(opt.species, opt.islandbedfile, "BED3", 0);
	SeparateByChrom.separateByChrom(chroms, opt.bedfile, '.bed1')
	filter_tags_by_islands(chroms, islands, opt.fragment_size)
	final_output_file = opt.out_file;
	final_output_file = SeparateByChrom.combineAllGraphFiles(chroms,
								   '_filtered.bed1',
								   final_output_file);
	SeparateByChrom.cleanup(chroms,'.bed1');
	SeparateByChrom.cleanup(chroms,'_filtered.bed1');

示例#7

0

显示文件

文件： run-make-graph-file-by-chrom.py 项目： NYU-BFX/hic-bench

def main(argv):
    """
    Note the window_size and the fragment_size are both input as strings, as they are used in
    a shell script in makeGraphFile.
    """
    parser = OptionParser()
    parser.add_option("-s", "--species", action="store", type="string",
                      dest="species", help="mm8,hg18,dm2,etc", metavar="<str>")
    parser.add_option("-b", "--bed_file", action="store", type="string",
                      dest="bedfile", help="bed file to make graph file of",
                      metavar="<file>")
    parser.add_option("-w", "--window_size", action="store", type="int",
                      dest="window_size", help="window size", metavar="<int>")
    parser.add_option("-i", "--fragment_size", action="store", type="int",
                      dest="fragment_size",
                      help="size of fragments after CHIP experiment",
                      metavar="<int>")
    parser.add_option("-o", "--outfile", action="store", type="string",
                      dest="outfile", help="output bed summary file name",
                      metavar="<file>")
    (opt, args) = parser.parse_args(argv)
    if len(argv) < 10:
        parser.print_help()
        sys.exit(1)

    if opt.species in GenomeData.species_chroms.keys():
        chroms = GenomeData.species_chroms[opt.species];
	chrom_lengths = GenomeData.species_chrom_lengths[opt.species];
        SeparateByChrom.separateByChrom(chroms, opt.bedfile, ".bed");
	makeGraphFile(chroms, chrom_lengths, opt.window_size, opt.fragment_size);
        final_output_file = opt.outfile;
        final_output_file = SeparateByChrom.combineAllGraphFiles(chroms, ".graph", final_output_file);
        SeparateByChrom.cleanup(chroms, ".bed");
	SeparateByChrom.cleanup(chroms, ".graph");
    else:
        print opt.species + " is not in the species list ";

示例#8

0

显示文件

def main(argv):
    parser = OptionParser()
    parser.add_option("-a",
                      "--islandfile1",
                      action="store",
                      type="string",
                      dest="islandfile1",
                      metavar="<file>",
                      help="file 1 with islands info to be unioned")
    parser.add_option(
        "-b",
        "--islandfile2",
        action="store",
        type="string",
        dest="islandfile2",
        metavar="<file>",
        help="file 2 with islands info to be unioned; if no, type in any word")
    parser.add_option("-s",
                      "--species",
                      action="store",
                      type="string",
                      dest="species",
                      help="species, mm8 or hg18",
                      metavar="<str>")
    parser.add_option("-o",
                      "--outputfile",
                      action="store",
                      type="string",
                      dest="outfile",
                      metavar="<file>",
                      help="output file name")

    (opt, args) = parser.parse_args(argv)
    if len(argv) < 8:
        parser.print_help()
        sys.exit(1)

    if opt.species in species_chroms.keys():
        chroms = species_chroms[opt.species]
    else:
        print "This species is not recognized, exiting"
        sys.exit(1)

    SeparateByChrom.separateByChrom(chroms, opt.islandfile1, '.island1')

    if Utility.fileExists(opt.islandfile2):
        SeparateByChrom.separateByChrom(chroms, opt.islandfile2, '.island2')

        for chrom in chroms:
            f = open(chrom + '.output', 'w')
            bed_vals_1 = BED.BED(opt.species, chrom + '.island1', "BED3", 0)
            bed_vals_2 = BED.BED(opt.species, chrom + '.island2', "BED3", 0)
            if len(bed_vals_1[chrom]) > 0 or len(bed_vals_2[chrom]) > 0:
                islandlist = bed_vals_1[chrom] + bed_vals_2[chrom]
                union_islands_to_file(islandlist, f)
            f.close()
        SeparateByChrom.cleanup(chroms, '.island2')
    else:
        for chrom in chroms:
            f = open(chrom + '.output', 'w')
            bed_vals_1 = BED.BED(opt.species, chrom + '.island1', "BED3", 0)
            if len(bed_vals_1[chrom]) > 0:
                islandlist = bed_vals_1[chrom]
                union_islands_to_file(islandlist, f)
            f.close()

    SeparateByChrom.combineAllGraphFiles(chroms, '.output', opt.outfile)

    SeparateByChrom.cleanup(chroms, '.output')
    SeparateByChrom.cleanup(chroms, '.island1')

示例#9

0

显示文件

def main(argv):
    """
    Note the window_size and the fragment_size are both input as strings, as they are used in
    a shell script in makeGraphFile.
    """
    parser = OptionParser()
    parser.add_option("-s",
                      "--species",
                      action="store",
                      type="string",
                      dest="species",
                      help="mm8,hg18,dm2,etc",
                      metavar="<str>")
    parser.add_option("-b",
                      "--bed_file",
                      action="store",
                      type="string",
                      dest="bamfile",
                      help="bed file to make graph file of",
                      metavar="<file>")
    parser.add_option("-w",
                      "--window_size",
                      action="store",
                      type="int",
                      dest="window_size",
                      help="window size",
                      metavar="<int>")
    parser.add_option("-i",
                      "--fragment_size",
                      action="store",
                      type="int",
                      dest="fragment_size",
                      help="size of fragments after CHIP experiment",
                      metavar="<int>")
    parser.add_option("-o",
                      "--outfile",
                      action="store",
                      type="string",
                      dest="outfile",
                      help="output bed summary file name",
                      metavar="<file>")

    (opt, args) = parser.parse_args(argv)
    #if len(argv) < 10:
    #    sys.stderr.write(str(len(argv)) + '\n')
    #    parser.print_help()
    #    sys.exit(1)
    #
    #if opt.species in GenomeData.species_chroms.keys():
    #    chroms = GenomeData.species_chroms[opt.species];
    #
    #chrom_lengths = GenomeData.species_chrom_lengths[opt.species];
    chromsDict = SeparateByChrom.getChromsFromBam(opt.bamfile)

    SeparateByChrom.separateByChromBamToBed(chromsDict.keys(), opt.bamfile,
                                            '.bed')

    makeGraphFile(chromsDict.keys(), chromsDict, opt.window_size,
                  opt.fragment_size)
    final_output_file = opt.outfile
    final_output_file = SeparateByChrom.combineAllGraphFiles(
        chromsDict.keys(), ".graph", final_output_file)
    SeparateByChrom.cleanup(chromsDict.keys(), ".bed")

    SeparateByChrom.cleanup(chromsDict.keys(), ".graph")

示例#10

0

显示文件

文件： coarsegraining-v9.py 项目： zanglab/recognicer

def main(argv):
	'''
	Coarse graining test chr1, input must only have chr1
	
	'''
	parser = OptionParser()
	parser.add_option("-s", "--species", action="store", type="string", dest="species", help="mm8, hg18, background, etc", metavar="<str>")
	parser.add_option("-b", "--summarygraph", action="store",type="string", dest="summarygraph", help="summarygraph", metavar="<file>")
	parser.add_option("-w", "--window_size(bp)", action="store", type="int", dest="window_size", help="window_size(in bps)", metavar="<int>")
	parser.add_option("-g", "--graining_size", action="store", type="int",  dest="step", help="graining unit size (>0)", metavar="<int>")
	parser.add_option("-e", "--score", action="store", type="int", dest="score", help="graining criterion, 0<score<=graining_size", metavar="<int>")
	parser.add_option("-t", "--mappable_faction_of_genome_size", action="store", type="float", dest="fraction", help="mapable fraction of genome size", metavar="<float>")
	parser.add_option("-f", "--output_file", action="store", type="string", dest="out_file", help="output file name", metavar="<file>")
	
	(opt, args) = parser.parse_args(argv)
	if len(argv) < 14:
        	parser.print_help()
        	sys.exit(1)

	print "Coarse-graining approach to identify ChIP-Seq enriched domains:"
	if opt.species in  GenomeData.species_chroms.keys():
		print "Species: ", opt.species;
		print "Window_size: ", opt.window_size;
		print "Coarse graining step: ", opt.step;
		print "Coarse graining score:", opt.score;
		chroms = GenomeData.species_chroms[opt.species]
		total_read_count = get_total_tag_counts.get_total_tag_counts_bed_graph(opt.summarygraph);
		print "Total read count:", total_read_count
		genome_length = sum (GenomeData.species_chrom_lengths[opt.species].values());
		genome_length = int(opt.fraction * genome_length);

		average = float(total_read_count) * opt.window_size/genome_length; 
		print "Effective genome length: ", genome_length;
		print "window average:", average;
		
		min_tags_in_window = int(average) + 1
		print "Minimum read count in a qualified window: ", min_tags_in_window
		
		print "Generate preprocessed data list"; 
		#read in the summary graph file
		bed_val = BED.BED(opt.species, opt.summarygraph, "BED_GRAPH");
		#generate the probscore summary graph file, only care about enrichment
		for chrom in chroms: 
			if chrom in bed_val.keys() and len(bed_val[chrom]) > 0:
				chrom_length = GenomeData.species_chrom_lengths[opt.species][chrom]
				eligible_start_list = []
				for index in xrange(len(bed_val[chrom])):
					read_count = bed_val[chrom][index].value;
					if read_count >= min_tags_in_window:
						eligible_start_list.append(bed_val[chrom][index].start)
				print "Coarse graining:";
				(result_list, island_list) = coarsegraining(eligible_start_list, opt.window_size, opt.step, opt.score, chrom_length)
				print "Trace back...", len(island_list)
				islands = traceback(island_list, opt.window_size, opt.step, 0, chrom_length, chrom)
				print len(islands), "islands found in", chrom
				f = open(chrom + ".islandstemp", 'w')
				for i in range(0, len(islands)):
					f.write(chrom + '\t' + str(int(islands[i].start)) + '\t' + str(int(islands[i].end)) + '\t1\n')
				f.close()
		o = open(opt.out_file, 'w')
		o.write('track type=bedGraph name=' + opt.out_file + '\n')
		o.close()
		SeparateByChrom.combineAllGraphFiles(chroms, ".islandstemp", opt.out_file)
		SeparateByChrom.cleanup(chroms, ".islandstemp")
		#else: 
			#print "input data error!"
	else:
		print "This species is not in my list!";

示例#11

0

显示文件

文件： find_overlapped_islands.py 项目： NYU-BFX/hic-bench

def main(argv):
	parser = OptionParser()
	parser.add_option("-a", "--islandfile1", action="store", type="string", dest="islandfile1", metavar="<file>", help="file 1 with islands info to be compared")
	parser.add_option("-b", "--islandfile2", action="store", type="string", dest="islandfile2", metavar="<file>", help="file 2 with islands info to be compared")
	parser.add_option("-s", "--species", action="store", type="string", dest="species", help="species, mm8 or hg18", metavar="<str>")
	parser.add_option("-p", "--overlapin1", action="store", type="string", dest="overlapin1", metavar="<file>", help="file for islands in 1 overlapping with islands in 2")
	parser.add_option("-q", "--nonoverlapin1", action="store", type="string", dest="nonoverlapin1", help="file for islands in 1 not overlapping with islands in 2 ", metavar="<file>")

	(opt, args) = parser.parse_args(argv)
	if len(argv) < 10:
        	parser.print_help()
        	sys.exit(1)
	
	if opt.species in species_chroms.keys():
		chroms = species_chroms[opt.species];
	else:
		print "This species is not recognized, exiting";
		sys.exit(1);
	
	
	total_overlap_number_1 = 0
	total_islands_1 = 0
	
	SeparateByChrom.separateByChrom(chroms, opt.islandfile1, '.island1')
	SeparateByChrom.separateByChrom(chroms, opt.islandfile2, '.island2')
	
	for chrom in chroms: 
		f = open(chrom + '.1in2', 'w')
		g = open(chrom + '.1notin2', 'w')
		bed_vals_2 = BED.BED(opt.species, chrom+'.island2', "BED3", 0)
		if Utility.fileExists(chrom+'.island1') and len(bed_vals_2[chrom])>0:
			islandlist2 = bed_vals_2[chrom];
			if (are_islands_sorted(islandlist2) != 1):
				islandlist2.sort(key=operator.attrgetter('start'));
			(island2_start_list, island2_end_list) = union_islands(islandlist2)
			islands1 = open(chrom+'.island1', 'r')
			for line in islands1:
				if not re.match("#", line):
					total_islands_1 += 1
					line = line.strip()
					sline = line.split()
					start = int(sline[1])
					end = int(sline[2])
					if (region_overlap(start, end, island2_start_list, island2_end_list) == 1):
						f.write('\t'.join(sline) + '\n')
						total_overlap_number_1 += 1;
					else:
						g.write('\t'.join(sline) + '\n');
		elif Utility.fileExists(chrom+'.island1') and (len(bed_vals_2[chrom])==0):
			islands1 = open(chrom+'.island1', 'r')
			for line in islands1:
				if not re.match("#", line):
					total_islands_1 += 1
					line = line.strip()
					sline = line.split()
					g.write('\t'.join(sline) + '\n');
		f.close()
		g.close()
	
	print "total number of island in "+opt.islandfile1+":     ", total_islands_1;
	print "total number of island in "+opt.overlapin1+":     ", total_overlap_number_1;
	
	SeparateByChrom.combineAllGraphFiles(chroms, '.1in2', opt.overlapin1);
	SeparateByChrom.combineAllGraphFiles(chroms, '.1notin2', opt.nonoverlapin1);
	
	SeparateByChrom.cleanup(chroms, '.1in2')
	SeparateByChrom.cleanup(chroms, '.1notin2')
	SeparateByChrom.cleanup(chroms, '.island1')
	SeparateByChrom.cleanup(chroms, '.island2')

示例#12

0

显示文件

文件： find_overlapped_islands.py 项目： jrtejedor/SICERpy

def main(argv):
    parser = OptionParser()
    parser.add_option("-a",
                      "--islandfile1",
                      action="store",
                      type="string",
                      dest="islandfile1",
                      metavar="<file>",
                      help="file 1 with islands info to be compared")
    parser.add_option("-b",
                      "--islandfile2",
                      action="store",
                      type="string",
                      dest="islandfile2",
                      metavar="<file>",
                      help="file 2 with islands info to be compared")
    parser.add_option("-s",
                      "--species",
                      action="store",
                      type="string",
                      dest="species",
                      help="species, mm8 or hg18",
                      metavar="<str>")
    parser.add_option(
        "-p",
        "--overlapin1",
        action="store",
        type="string",
        dest="overlapin1",
        metavar="<file>",
        help="file for islands in 1 overlapping with islands in 2")
    parser.add_option(
        "-q",
        "--nonoverlapin1",
        action="store",
        type="string",
        dest="nonoverlapin1",
        help="file for islands in 1 not overlapping with islands in 2 ",
        metavar="<file>")

    (opt, args) = parser.parse_args(argv)
    if len(argv) < 10:
        parser.print_help()
        sys.exit(1)

    if opt.species in species_chroms.keys():
        chroms = species_chroms[opt.species]
    else:
        print "This species is not recognized, exiting"
        sys.exit(1)

    total_overlap_number_1 = 0
    total_islands_1 = 0

    SeparateByChrom.separateByChrom(chroms, opt.islandfile1, '.island1')
    SeparateByChrom.separateByChrom(chroms, opt.islandfile2, '.island2')

    for chrom in chroms:
        f = open(chrom + '.1in2', 'w')
        g = open(chrom + '.1notin2', 'w')
        bed_vals_2 = BED.BED(opt.species, chrom + '.island2', "BED3", 0)
        if Utility.fileExists(chrom +
                              '.island1') and len(bed_vals_2[chrom]) > 0:
            islandlist2 = bed_vals_2[chrom]
            if (are_islands_sorted(islandlist2) != 1):
                islandlist2.sort(key=operator.attrgetter('start'))
            (island2_start_list, island2_end_list) = union_islands(islandlist2)
            islands1 = open(chrom + '.island1', 'r')
            for line in islands1:
                if not re.match("#", line):
                    total_islands_1 += 1
                    line = line.strip()
                    sline = line.split()
                    start = int(sline[1])
                    end = int(sline[2])
                    if (region_overlap(start, end, island2_start_list,
                                       island2_end_list) == 1):
                        f.write('\t'.join(sline) + '\n')
                        total_overlap_number_1 += 1
                    else:
                        g.write('\t'.join(sline) + '\n')
        elif Utility.fileExists(chrom + '.island1') and (len(bed_vals_2[chrom])
                                                         == 0):
            islands1 = open(chrom + '.island1', 'r')
            for line in islands1:
                if not re.match("#", line):
                    total_islands_1 += 1
                    line = line.strip()
                    sline = line.split()
                    g.write('\t'.join(sline) + '\n')
        f.close()
        g.close()

    print "total number of island in " + opt.islandfile1 + ":     ", total_islands_1
    print "total number of island in " + opt.overlapin1 + ":     ", total_overlap_number_1

    SeparateByChrom.combineAllGraphFiles(chroms, '.1in2', opt.overlapin1)
    SeparateByChrom.combineAllGraphFiles(chroms, '.1notin2', opt.nonoverlapin1)

    SeparateByChrom.cleanup(chroms, '.1in2')
    SeparateByChrom.cleanup(chroms, '.1notin2')
    SeparateByChrom.cleanup(chroms, '.island1')
    SeparateByChrom.cleanup(chroms, '.island2')

示例#13

0

显示文件

文件： find_overlapped_islands_w_tagcount.py 项目： PrinnyJungle/Bioinformatics

def main(argv):
    parser = OptionParser()
    parser.add_option("-a",
                      "--islandfile1",
                      action="store",
                      type="string",
                      dest="islandfile1",
                      metavar="<file>",
                      help="file 1 with islands info to be compared")
    parser.add_option("-b",
                      "--islandfile2",
                      action="store",
                      type="string",
                      dest="islandfile2",
                      metavar="<file>",
                      help="file 2 with islands info to be compared")
    parser.add_option("-s",
                      "--species",
                      action="store",
                      type="string",
                      dest="species",
                      help="species, mm8 or hg18",
                      metavar="<str>")
    parser.add_option(
        "-p",
        "--overlapin1",
        action="store",
        type="string",
        dest="overlapin1",
        metavar="<file>",
        help="file for islands in 1 overlapping with islands in 2")
    parser.add_option(
        "-q",
        "--nonoverlapin1",
        action="store",
        type="string",
        dest="nonoverlapin1",
        help="file for islands in 1 not overlapping with islands in 2 ",
        metavar="<file>")

    (opt, args) = parser.parse_args(argv)
    if len(argv) < 10:
        parser.print_help()
        sys.exit(1)

    if opt.species in species_chroms.keys():
        chroms = species_chroms[opt.species]
    else:
        print "This species is not recognized, exiting"
        sys.exit(1)

    total_overlap_number_1 = 0
    total_islands_1 = 0

    SeparateByChrom.separateByChrom(chroms, opt.islandfile1, '.island1')
    SeparateByChrom.separateByChrom(chroms, opt.islandfile2, '.island2')

    for chrom in chroms:
        f = open(chrom + '.1in2', 'w')
        g = open(chrom + '.1notin2', 'w')
        bed_vals_1 = BED.BED(opt.species, chrom + '.island1', "BED_GRAPH", 0)
        bed_vals_2 = BED.BED(opt.species, chrom + '.island2', "BED_GRAPH", 0)
        if len(bed_vals_1[chrom]) > 0 and len(bed_vals_2[chrom]) > 0:
            islandlist1 = bed_vals_1[chrom]
            islandlist2 = bed_vals_2[chrom]
            total_islands_1 += len(bed_vals_1[chrom])
            for islandlist1_item in islandlist1:
                start = islandlist1_item.start
                end = islandlist1_item.end
                if (region_overlap(start, end, islandlist2) == 1):
                    write(islandlist1_item, f)
                    total_overlap_number_1 += 1
                else:
                    write(islandlist1_item, g)
        elif (len(bed_vals_1[chrom]) > 0) and (len(bed_vals_2[chrom]) == 0):
            total_islands_1 += len(bed_vals_1[chrom])
            for islandlist1_item in bed_vals_1[chrom]:
                write(islandlist1_item, g)
        f.close()
        g.close()

    print "total number of island in " + opt.islandfile1 + ":     ", total_islands_1
    print "total number of island in " + opt.overlapin1 + ":     ", total_overlap_number_1

    SeparateByChrom.combineAllGraphFiles(chroms, '.1in2', opt.overlapin1)
    SeparateByChrom.combineAllGraphFiles(chroms, '.1notin2', opt.nonoverlapin1)

    SeparateByChrom.cleanup(chroms, '.1in2')
    SeparateByChrom.cleanup(chroms, '.1notin2')
    SeparateByChrom.cleanup(chroms, '.island1')
    SeparateByChrom.cleanup(chroms, '.island2')