Пример #1
0
class TestBigWig(unittest.TestCase):
    def setUp(self):
        f = open("test_data/bbi_tests/test.bw")
        self.bw = BigWigFile(file=f)

    def test_get_summary(self):
        data = self.bw.query("chr1", 10000, 20000, 10)
        means = [x['mean'] for x in data]
        print means
        assert numpy.allclose(map(float, means), [
            -0.17557571594973645, -0.054009292602539061, -0.056892242431640622,
            -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557,
            0.036949024200439454, 0.076638259887695306, 0.043518108367919923,
            0.01554749584197998
        ])

        # Summarize variant
        sd = self.bw.summarize("chr1", 10000, 20000, 10)
        assert numpy.allclose(sd.sum_data / sd.valid_count, [
            -0.17557571594973645, -0.054009292602539061, -0.056892242431640622,
            -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557,
            0.036949024200439454, 0.076638259887695306, 0.043518108367919923,
            0.01554749584197998
        ])

        # Test min and max for this entire summary region
        data = self.bw.query("chr1", 10000, 20000, 1)
        maxs = [x['max'] for x in data]
        mins = [x['min'] for x in data]
        self.assertEqual(map(float, maxs), [0.289000004529953])
        self.assertEqual(map(float, mins), [-3.9100000858306885])

    def test_get_leaf(self):
        data = self.bw.query("chr1", 11000, 11005, 5)
        means = [x['mean'] for x in data]
        assert numpy.allclose(map(float, means), [
            0.050842501223087311, -2.4589500427246094, 0.050842501223087311,
            0.050842501223087311, 0.050842501223087311
        ])

        # Test min and max for this entire leaf region
        data = self.bw.query("chr1", 11000, 11005, 1)
        maxs = [x['max'] for x in data]
        mins = [x['min'] for x in data]
        self.assertEqual(map(float, maxs), [0.050842501223087311])
        self.assertEqual(map(float, mins), [-2.4589500427246094])

    def test_wrong_nochrom(self):
        data = self.bw.query("chr2", 0, 10000, 10)
        self.assertEqual(data, None)
Пример #2
0
def main():
    p = optparse.OptionParser(__doc__)
    p.add_option('-A', '--absolute', action='store_true',dest='A',\
                 default=False, help='absolute threshold')
    p.add_option('-s','--standard_background', action='store_true',\
                 dest='stdbg')
    p.add_option('-D', '--debug', action='store_true', dest='debug')
    options, args = p.parse_args()
    debug_c = 0

    BEDFILE = open(args[0], 'rU')
    BW = BigWigFile(file=open(args[1]))
    BEDout = open(args[2], 'w')

    for line in BEDFILE:
        print(line)
        line = line.strip().split('\t')
        x = BW.query(line[0], int(line[1]), int(line[2]),1)
        line.append(str(round(x[0]['mean'], 5)))
        BEDout.write("\t".join(line)+"\n")
        """
        for i in x:
            print i['mean']
        """

        if options.debug:
            debug_c +=1
            if debug_c >= 10:
            break


if __name__ == '__main__':
    main()
Пример #3
0
def createMappabilityList(fragmentsMap, bwfile, fragmentCount, options):
    # keep record which fragment has decent mappability
    mappable = np.zeros((fragmentCount, ), dtype=np.float)

    # lazy load
    from bx.intervals.io import GenomicIntervalReader
    from bx.bbi.bigwig_file import BigWigFile
    bw = BigWigFile(open(bwfile))

    for fragmentId in fragmentsMap.keys():

        (chrom, start, end) = fragmentsMap[fragmentId]

        if (options.vverbose):
            print >> sys.stdout, "- process %s %d-%d " % (chrom, start, end)

        try:
            mappable[fragmentId] = bw.query(chrom, start, end, 1)[0]["mean"]
            if (np.isnan(mappable[fragmentId])):
                mappable[fragmentId] = 0
        except:
            mappable[fragmentId] = 0.
            # problem with invalid values
            if (options.vverbose):
                print >> sys.stderr, "Problem with bw file at %s %d-%d" % (
                    chrom, start, end)
                print traceback.format_exc()

    return mappable
Пример #4
0
def createMappabilityList(fragmentsMap, bwfile, fragmentCount, options):
    # keep record which fragment has decent mappability
    mappable = np.zeros((fragmentCount,), dtype=np.float)

    # lazy load
    from bx.intervals.io import GenomicIntervalReader
    from bx.bbi.bigwig_file import BigWigFile
    bw = BigWigFile( open( bwfile ) )

    for fragmentId in fragmentsMap.keys():

        (chrom, start, end) = fragmentsMap[fragmentId]

        if (options.vverbose):
            print >> sys.stdout, "- process %s %d-%d " % (chrom, start, end)

        try:
            mappable[fragmentId] = bw.query(chrom, start, end, 1)[0]["mean"]
            if (np.isnan(mappable[fragmentId])):
                mappable[fragmentId] = 0
        except:
            mappable[fragmentId] = 0.
            # problem with invalid values
            if (options.vverbose):
                print >> sys.stderr, "Problem with bw file at %s %d-%d" % (chrom, start, end)
                print traceback.format_exc()

    return mappable
Пример #5
0
def big_wig_summary_worker((span, bw_list, region_bed_file_name, nb_proc,
                            verbose)):

    results = list()
    bw_label = [os.path.basename(p) for p in bw_list]
    bw_label = [os.path.splitext(os.path.basename(p))[0] for p in bw_list]

    if verbose:
        sys.stderr.write("Processing: " + region_bed_file_name)
    for big_wig, cpt in zip(bw_list, range(len(bw_list))):

        bigwig = BigWigFile(open(big_wig, "r"))
        if verbose:

            sys.stderr.write("Computing coverage for file: " + big_wig + " [" +
                             str(multiprocessing.current_process()) + "], " +
                             str(span[1] - span[0]) + " chunks to process.\n")

        bed_windows = pybedtools.BedTool(region_bed_file_name)

        chr_cur = None

        # Loop through bed lines (features object)

        for i in bed_windows[slice(span[0], span[1])]:

            if chr_cur == None:
                chr_cur = i.chrom

            else:
                if i.chrom != chr_cur:
                    chr_cur = i.chrom

            # Note: bigWig is zero-based/half open as bed.
            bw_sum = bigwig.query(i.chrom, i.start, i.end, 1)

            if bw_sum is not None:
                bw_sum = bw_sum[0]['mean']
                bw_sum = np.nan_to_num(bw_sum)
                bw_sum = np.round(bw_sum, 2)
            else:
                bw_sum = 0.00

            results.append(
                (i.chrom + ":" + str(i.start), bw_label[cpt], float(bw_sum)))

    if verbose:

        sys.stderr.write("Computing coverage for file: " + big_wig + " [" +
                         str(multiprocessing.current_process()) +
                         "]. Job done.\n")

    return results
Пример #6
0
def get_mappability(mappability_file,
                    vcf_file,
                    out_file,
                    region=None,
                    append_chr=True):
    map_reader = BigWigFile(open(mappability_file, 'rb'))

    vcf_reader = vcf.Reader(filename=vcf_file)

    if region is not None:
        chrom, beg, end = parse_region_for_vcf(region)
        try:
            vcf_reader = vcf_reader.fetch(chrom, start=beg, end=end)
        except ValueError:
            print("no data for region {} in vcf".format(region))
            vcf_reader = []

    data = []

    for record in vcf_reader:
        if append_chr:
            chrom = 'chr{0}'.format(record.CHROM)

        else:
            chrom = record.CHROM

        coord = record.POS

        beg = coord - 100

        beg = max(beg, 0)

        end = coord + 100

        result = map_reader.query(chrom, beg, end, 1)

        if result is None:
            mappability = 0

        else:
            mappability = result[0]['mean']

        data.append({
            'chrom': record.CHROM,
            'coord': record.POS,
            'mappability': mappability
        })

    data = pd.DataFrame(data)

    csvutils.write_dataframe_to_csv_and_yaml(data, out_file, dtypes())
Пример #7
0
class TestBigWig(unittest.TestCase):
    def setUp(self):
        f = open( "test_data/bbi_tests/test.bw" )
        self.bw = BigWigFile(file=f)
        
    def test_get_summary(self):
        data = self.bw.query("chr1", 10000, 20000, 10)
        means = [ x['mean'] for x in data ]
        print means
        assert numpy.allclose( map(float, means), [-0.17557571594973645, -0.054009292602539061, -0.056892242431640622, -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557, 0.036949024200439454, 0.076638259887695306, 0.043518108367919923, 0.01554749584197998] )
        
        # Summarize variant
        sd = self.bw.summarize( "chr1", 10000, 20000, 10)
        assert numpy.allclose( sd.sum_data / sd.valid_count, [-0.17557571594973645, -0.054009292602539061, -0.056892242431640622, -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557, 0.036949024200439454, 0.076638259887695306, 0.043518108367919923, 0.01554749584197998] )
        
        # Test min and max for this entire summary region
        data = self.bw.query("chr1", 10000, 20000, 1)
        maxs = [ x['max'] for x in data ]
        mins = [ x['min'] for x in data ]
        self.assertEqual( map(float, maxs), [0.289000004529953] )
        self.assertEqual( map(float, mins), [-3.9100000858306885] )
        
    def test_get_leaf(self):
        data = self.bw.query("chr1", 11000, 11005, 5)
        means = [ x['mean'] for x in data ]
        assert numpy.allclose( map(float, means), [0.050842501223087311, -2.4589500427246094, 0.050842501223087311, 0.050842501223087311, 0.050842501223087311] )
        
        # Test min and max for this entire leaf region
        data = self.bw.query("chr1", 11000, 11005, 1)
        maxs = [ x['max'] for x in data ]
        mins = [ x['min'] for x in data ]
        self.assertEqual( map(float, maxs), [0.050842501223087311] )
        self.assertEqual( map(float, mins), [-2.4589500427246094] )
        
    def test_wrong_nochrom(self):
        data = self.bw.query("chr2", 0, 10000, 10)
        self.assertEqual( data, None )
Пример #8
0
def output(fragmentsMap , fragmentList, fragmentPairs, fragmentCount, fragmentsChrom):
    '''
    outputs 2 files, the first containing 
    "chr    extraField      fragmentMid     marginalizedContactCount        mappable? (0/1)"
    
    and the second containing:
    "chr1   fragmentMid1    chr2    fragmentMid2    contactCount"
    
    optionally output the 2D contact matrix
    '''
    
    if (options.verbose):
        print >> sys.stdout, "- %s START   : output data " % (timeStamp())

    if ( options.outputFilename != "" ):
        outfile1 = gzip.open(options.outputDir+options.outputFilename+".fragmentLists.gz","wb")
    else:
        outfile1 = gzip.open(options.outputDir+os.path.basename(args[0])+".fragmentLists.gz","wb")
    
    fragmentIds = fragmentsMap.keys()
    fragmentIds.sort()

    # lookup mean mappability ratio
    bw = ""
    if (options.mappability != ""):
        # lazy load
        from bx.intervals.io import GenomicIntervalReader
        from bx.bbi.bigwig_file import BigWigFile
        bw = BigWigFile( open( options.mappability ) )
    

    for fragmentId in fragmentIds:

        contactCounts = 0
        chrom = fragmentsMap[fragmentId][0]
        midpoint =  fragmentsMap[fragmentId][1]

        if (options.vverbose):
            print >> sys.stdout, "- process %s %d " % (chrom, midpoint)

        if (fragmentList.has_key(fragmentId)):
            contactCounts = fragmentList[fragmentId]
        
        if (bw != ""):    
            try:
                mappable = bw.query(chrom, midpoint-options.resolution/2, midpoint+options.resolution/2, 1)[0]["mean"]
            except:
                mappable = 0
                # problem with invalid values
                if (options.vverbose):
                    print >> sys.stderr, "Problem with bw file at %s %d-%d" % (chrom, midpoint-options.resolution/2, midpoint+options.resolution/2)
                    print traceback.format_exc()
                
        elif (contactCounts>0):
            mappable=1

        outfile1.write("%s\t%d\t%s\t%f\n" % (chrom, midpoint, "NA", mappable))
        
    outfile1.close()
    
    if ( options.outputFilename != "" ):
        outfile2 = gzip.open(options.outputDir+options.outputFilename+".contactCounts.gz","wb")
    else:
        outfile2 = gzip.open(options.outputDir+os.path.basename(args[0])+".contactCounts.gz","wb")
        
    for fragmentIds, contactCounts in fragmentPairs.iteritems():
        chrom1 = fragmentsMap[fragmentIds[0]][0]
        midpoint1 =  fragmentsMap[fragmentIds[0]][1]
    
        chrom2 = fragmentsMap[fragmentIds[1]][0]
        midpoint2 =  fragmentsMap[fragmentIds[1]][1]
    
        outfile2.write("%s\t%d\t%s\t%d\t%d\n" % (chrom1, midpoint1, chrom2, midpoint2, contactCounts))
        
    outfile2.close()
    
    if (options.create2DMatrix or options.create2DMatrixPerChr):
        # lazy loading
        from scipy.sparse import lil_matrix
        import numpy

        # populate sparse matrix
        A = lil_matrix((fragmentCount, fragmentCount), dtype='i')    
        for fragmentIds, contactCounts in fragmentPairs.iteritems():
            A[fragmentIds[0],fragmentIds[1]] = contactCounts
            A[fragmentIds[1],fragmentIds[0]] = contactCounts
        # convert to coordinate format 
        B = A.tocoo()
            
        if (options.create2DMatrix):

            if ( options.outputFilename != "" ):
                outfile3 = options.outputDir+options.outputFilename+".matrix"
            else:
                outfile3 = options.outputDir+os.path.basename(args[0])+".matrix"

            if (options.verbose):
                print >> sys.stdout, "- save 2Dmatrix to %s " % (outfile3)
            
            f_handle=open(outfile3,'w')
            
            C = B.tocsr()
            for i in xrange(fragmentCount):
                numpy.savetxt(f_handle, C[i].toarray(),fmt='%i', delimiter='\t')
                
            f_handle.close()

        if (options.create2DMatrixPerChr):
            for chr in fragmentsChrom.keys():

                C = B.tocsc()[:,fragmentsChrom[chr][0]:fragmentsChrom[chr][1]].tocsr()[fragmentsChrom[chr][0]:fragmentsChrom[chr][1],:]

                fragmentRange=fragmentsChrom[chr][1]-fragmentsChrom[chr][0]
                header=['d']+[ "%s%d" % i for i in zip(['r']*fragmentRange,range(fragmentRange))]
                
                if ( options.outputFilename != "" ):
                    outfile3 = options.outputDir+options.outputFilename+"."+chr+".matrix"
                else:
                    outfile3 = options.outputDir+os.path.basename(args[0])+"."+chr+".matrix"

                if (options.verbose):
                    print >> sys.stdout, "- save 2Dmatrix for chromosome %s to %s " % (chr, outfile3)
                
                f_handle=open(outfile3,'w')
                f_handle.write('\t'.join(header)+"\n")
                for i in xrange(fragmentRange):
                    f_handle.write(header[i+1]+"\t")
                    numpy.savetxt(f_handle, C[i].toarray(),fmt='%i', delimiter='\t')
                    
                f_handle.close()
            
    if (options.verbose):
        print >> sys.stdout, "- %s FINISHED: output data" % (timeStamp())
Пример #9
0
def processChromatin(offBed, f_configuration):
	""" process chromatin data into blocks of specific resolution """

	if (options.verbose):
		print "Start chromatin processing"
	
	if (not f_configuration["conf"].has_key("genomesize")):
		calculateChromosomeSpecifics(chromsizes, f_configuration)
	step_size = int(f_configuration["conf"]["genomesize"] * options.chromatinResolution / (360. - f_configuration["conf"]["target_radius"] )) 

	if (options.verbose):
		print "chromatin stepsize: %d nts" % (step_size)
			
	samfile = ""
	bigwig = ""
	
	# check if chromatin file has been supplied
	if (options.chromatinFormat == "bam"):
		import pysam # lazy import of required module
		# try open the bam file 
		try:
			samfile = pysam.Samfile(options.chromatin, "rb" )
		except:
			print >> sys.stderr, "[ERROR] chromatin file (bam) could not be read : %s" % (options.chromatin)
			traceback.print_exc()
			exit(1)
	elif (options.chromatinFormat == "bigwig"):
		from bx.intervals.io import GenomicIntervalReader # lazy import of required module
		from bx.bbi.bigwig_file import BigWigFile  # lazy import of required module
		# try open the bigwig file
		try:
			bigwig = BigWigFile(open(options.chromatin))
		except:
			print >> sys.stderr, "[ERROR] chromatin file (bigwig) could not be read : %s" % (options.chromatin)	
    			traceback.print_exc()
			exit(1)
	
	if (options.verbose):
		print "... chromatin file opened"
	
	max_value = 0.
	
	# open output file	
	if (os.path.exists(options.output_dir+"chromatin.txt")):
		if (options.verbose):
			print "using existing chromatin file"
		for line in fileinput.input([options.output_dir+"chromatin.txt"]):
			c_value = float(line.strip().split("\t")[3])
			
			if (c_value > max_value):
				max_value = c_value
				
	else:
		f_chromatin = open(options.output_dir+"chromatin.txt","w")
		# process calculate mean chromatin density value for region
		for region in offBed.values():
			chrom = region[0]
			if (options.verbose):
				print "Collecting chromatin data for chromosom %s" % (chrom)
			
			
			for span_start in range(region[1], region[2], step_size):
				span_end = min(span_start + step_size, region[2])
				c_value = 0.
				
				if (span_start == span_end):
					continue
				
				if (options.chromatinFormat=="bam"):
					try:
						on_chromatin_features = 0.
						for pileupcolumn in samfile.pileup(chrom,span_start,span_end):
							on_chromatin_features += pileupcolumn.n
						c_value = on_chromatin_features/(span_end-span_start)
					except:
						if (options.verbose):
							print >> sys.stderr, "[WARN] bam file exception : %s:%d-%d" % (chrom,span_start,span_end)
				elif (options.chromatinFormat=="bigwig"):
					try:
						bwsummary = bigwig.query(chrom,span_start,span_end, 1 ) 
						c_value = bwsummary[0]["mean"]
					except:
						if (options.verbose):
							print >> sys.stderr, "[WARN] bigwig file exception : %s:%d-%d" % (chrom,span_start,span_end)

				if (math.isnan(c_value)):
					c_value = 0.

				# add pseudocount to circumvent log issues
				c_value += 0.001
				
				if (c_value > max_value):
					max_value = c_value

				f_chromatin.write("%s\t%d\t%d\t%.3f\n" % (chrom, span_start,span_end, c_value))
		
		# close filehandle
		f_chromatin.close()
	
	if (options.verbose):
		print "Maximal chromatin value: %.3f" % (max_value)
	
	# add track to configuration file
	if (not f_configuration.has_key("plots")):
		f_configuration["plots"] = {}

	f_configuration["plots"]["type"] = "heatmap"
	f_configuration["plots"]["color"] = "spectral-9-div"
	f_configuration["plots"]["stroke_thickness"] = "1"
	f_configuration["plots"]["stroke_color"] = "black"
		
	if (not f_configuration["plots"].has_key("plot")):
		f_configuration["plots"]["plot"] = []
	
	stroke_thickness = 0
	if (options.chromatinResolution >= 0.5):
		stroke_thickness = 1


	f_configuration["plots"]["plot"] += ['''
<plot>
	show             = conf(show_histogram)
	type             = histogram
	fill_under       = yes
	fill_color       = lgrey
	file             = %s
	r0               = 0.95r
	r1               = 0.997r
	scale_log_base   = 5
	stroke_thickness = %d
	color			 = black
	min              = 0.0
	max              = %.3f
	
	<axes>
		show = data
		thickness = 1
		color = lgrey
		<axis>
			spacing     = 0.1r
		</axis>
	</axes>
</plot>
''' % (options.output_dir+"chromatin.txt", stroke_thickness, max_value)]
	
	if (options.verbose):
		print "Finished chromatin processing"
Пример #10
0
pol2 = BigWigFile(open('/Users/JME/Documents/CSE549/hg19.bigWig'))
annotations = open('/Users/JME/Documents/CSE549/hg19.txt', 'r')
for line in annotations:
	lines = line.split('\t')
	if(lines[1] in chroms.keys()):
		chroms[lines[1]][int(lines[3])] = [int(lines[4]), lines[2]]

for i in chroms.keys():
	for j in chroms[i]:
		if not j in trScores.keys():
			start = 0
			end = 0
			if chroms[i][j][1] == '-':
				start = chroms[i][j][0]
				end = j
			else:
				start = j
				end = chroms[i][j][0]
			startTr = pol2.query(i, start - 30, start + 300, 1)
			endTr = pol2.query(i, start + 301, end, 1)
			if startTr and endTr:
				if startTr[0]['mean'] > 0 and endTr[0]['mean'] > 0:
					trScores[j] = 0
					trScores[j] = (startTr[0]['mean'])/ (endTr[0]['mean'])	
					if trScores[j] > maxScore:
						maxScore = trScores[j]
						maxGene = j

pickle.dump(chroms, open('chrom_data.p', 'wb'))
pickle.dump(trScores, open('scores.p', 'wb'))
print("Max: " + maxScore  + ", gene: " + maxGene)
Пример #11
0
def process():
	# get the data
	readdata()
	
	# set chromatin flag
	withChromatin = 'false'
	if (options.chromatin!=""):
		withChromatin = 'true'
		
	samfile = ""
	bigwig = ""
	# check if chromatin file has been supplied
	if (options.chromatin != "" and options.chromatinFormat == "bam"):
		import pysam # lazy import of required module
		# try open the bam file 
		try:
			samfile = pysam.Samfile(options.chromatin, "rb" )
		except:
			exit(1)
	elif (options.chromatin != "" and options.chromatinFormat == "bigwig"):
		from bx.intervals.io import GenomicIntervalReader # lazy import of required module
		from bx.bbi.bigwig_file import BigWigFile  # lazy import of required module
		# try open the bigwig file
		try:
			bigwig = BigWigFile(open(options.chromatin))
		except:
			exit(1)

	processors = 1
	if (options.processors <= 0): # figure out number of processors
		try:
			processors = multiprocessing.cpu_count()
		except:
			processors = 1
	else:
		processors = options.processors

	output_ontargets = "" # gets filled with a json object for on-targets
	
	# process one on-target region/locus of interest (LOI) at a time
	for loi in targets.keys():
		# info for whole region
		bed = loiBed[loi]
		chrom = bed[0]
		cstart = int(bed[1])
		cstop = int(bed[2])
		clabel = bed[3]
		cstrand = bed[5]
		tts = records[loi].seq	

		# process individual eligible target sites in this region/locus
		for target in targets[loi]:
			tstart = int(target[1])
			tstop = int(target[2])
			targetid = target[3]
			tstrand = target[5]
			tsize = tstop-tstart
			lflank = min(tstart-cstart, options.flanks)			
			rflank = min(cstop-tstop, options.flanks)
			
			targetSeq = ""
			if (cstrand == "+"):
				targetSeq = tts[tstart-cstart-lflank: tstop-cstart+rflank]
			else:
				targetSeq = tts.reverse_complement()[tstart-cstart-lflank: tstop-cstart+rflank]
			targetStr = [" " for i in range(lflank+tsize+rflank)]
			
			for i in range(lflank, lflank+tsize):
				targetStr[i]='+'
				
			if (target[8]!=""):
				for s in target[8].strip('d').split('d'):
					if (cstrand == "+"):
						targetStr[lflank+int(s)]='_'
					else:
						targetStr[lflank+tsize-int(s)-1]='_'
			
			on_chromatin_features = 0.
			if (options.chromatin != "" and options.chromatinFormat=="bam"):
				try:
					for pileupcolumn in samfile.pileup(chrom,tstart-lflank,tstop+rflank):
						on_chromatin_features += pileupcolumn.n
					on_chromatin_features = "%.3f" % (on_chromatin_features/(lflank+tsize+rflank))
				except:
					print >> sys.stderr, "[WARN] bam file exception : %s:%d-%d" % (chrom,tstart-lflank,tstop+rflank)
			elif (options.chromatin != "" and options.chromatinFormat=="bigwig"):
				try:
					bwsummary = bigwig.query(chrom, tstart-lflank, tstop+rflank, 1 ) 
					on_chromatin_features = "%.3f" % (bwsummary[0]["mean"])		
				except:
					print >> sys.stderr, "[WARN] bigwig file exception : %s:%d-%d" % (chrom,tstart-lflank,tstop+rflank)
			else:
				on_chromatin_features = "-"
			
			off_targets = {} # hashing start positions (w/r/t on target) and encoding (grouping off-targets) 				
			
			# branch point
			if (processors == 1): # no need to multiprocess

				for tfo_region in tfotargets[targetid].keys():
					# process each error category
					for tfo_error in tfotargets[targetid][tfo_region].keys():
						processOfftarget(off_targets, targetid, tfo_region, tfo_error, lflank, rflank, tsize, tstrand, samfile, bigwig);
						
			else: # contribute workload
				# load up work queue
				work_queue = multiprocessing.JoinableQueue(processors*3)
				
				# create a queue to pass to workers to store the results
				result_queue = multiprocessing.Queue()
				
				num_jobs = 0
				for tfo_region in tfotargets[targetid].keys():
					num_jobs += len(tfotargets[targetid][tfo_region])
					
				# spawn workers
				for i in range(min(num_jobs,processors)):
					worker = Worker(work_queue, result_queue, options.chromatin, options.chromatinFormat)
					worker.daemon=True
					worker.start()
					
				# process off-targets w/r/t to on-target
				# process each intersecting interval
				for tfo_region in tfotargets[targetid].keys():
					# process each error category
					for tfo_error in tfotargets[targetid][tfo_region].keys():
						work_queue.put(Job(targetid, tfo_region, tfo_error, lflank, rflank, tsize, tstrand));
						
				work_queue.close()		
				work_queue.join()
	
				for job in range(num_jobs):
					result = result_queue.get()
					for offset in result.keys():
						if (not off_targets.has_key(offset)):
							off_targets[offset] = {}
						for encoding in result[offset].keys():
							if (not off_targets[offset].has_key(encoding)):
								off_targets[offset][encoding] = result[offset][encoding]
			
			# merge point			
			output_sequence_w = ""
			output_sequence_c = ""
			for i in range(len(targetStr)):
				if (targetStr[i] ==" "):
					output_sequence_w += targetSeq.lower()[i]
					output_sequence_c += targetSeq.complement().lower()[i]
				else:
					output_sequence_w += targetSeq.upper()[i]
					output_sequence_c += targetSeq.complement().upper()[i]				

			# generate oaColumns (data table header)
			output_offtargets = ""
			for i in range(len(annotations)):
				output_offtargets += (',\n		{ "sTitle":"%s", "sToolTip":"number of off-targets intersecting with annotation data: %s" }' % (annotations[i],annotations[i]))
			output_offtargets += (',\n		{ "sTitle":"","bSortable":false,"bSearchable":false,"sClass":"btn_details","sToolTip":"Toggle detailed off-target inspector" }\n	],\n	"aaData":[')
			

			# generate aaData (data table content)
			for offset in off_targets.keys():
				for encoding in off_targets[offset]:
					ots = off_targets[offset][encoding]
					# format chromatin data if available
					if (options.chromatin != ""):
						ots["chromatin"] = "%.3f" % (ots["chromatin_max"])
					else:
						ots["chromatin"] = "-"

					output_offtargets += '\n		["%s",0,%d,%d,"%d-%d",%d,"%s",%d,[' % (encoding, ots["errors"], offset, offset-lflank, offset+len(encoding)-lflank,ots["copies"], ots["chromatin"], ots["length"],)
					for ot in ots["data"]:
						output_offtargets += '\n			["%s",%d,%d,"%s",%s],' % (ot)
					
					output_offtargets = output_offtargets[:-1]+"\n		]"
					if (len(annotations)>0):
						output_offtargets += ",%s" % (",".join(["%s" % el for el in ots["annotation"]]))
					output_offtargets += ',""],' 
			
			# finalise off-target json object and write output
			output_offtargets = ('''{
	"bPaginate": true,
	"bProcessing": true,
	"bAutoWidth": false,
	"bInfo": true,
	"bLengthChange": true,
    "bFilter": true,
	"iDisplayLength": 10,
	"aLengthMenu": [[10, 50, 100, -1], [10, 50, 100, "All"]],
	"bJQueryUI": true,
	"bDeferRender": true,
	"aaSorting": [[2, "asc"]],
	"aoColumns": [
		{ "sTitle":"%s","sClass":"monospace left", "sToolTip":"shows the part of the primary target that is responsible for off-targets" },
		{ "sTitle":"overlap", "sType": "numeric","sToolTip":"number of nucleotide positions off-targets overlap the primary target" },
		{ "sTitle":"errors", "sType": "numeric","sToolTip":"expected number of mismatches between the triplex-forming molecule designed against the primary target and off-targets" },
		{ "sTitle":"offset","sType": "numeric","bSearchable":false,"bVisible":false, "sToolTip":"the offset is used internally for computing the lefthand-side alignment"  },
		{ "sTitle":"sub region", "sToolTip":"region of the primary target this off-target spans" },
		{ "sTitle":"copies", "sType": "numeric","sToolTip":"number of copies of this off-target category" },
		{ "sTitle":"max. chromatin", "bVisible": %s, "sToolTip":"maximal chromatin score observed in any of the off-targets" },
		{ "sTitle":"length", "sType": "numeric","sToolTip":"length of the off-target", "bVisible":false },
		{ "sTitle":"offtargets","bVisible": false, "sToolTip":"detailed off-target list with location and annotation"  } '''+output_offtargets[:-1]+"\n	]\n}") % (output_sequence_w+"<br/>"+output_sequence_c, withChromatin)

		
			output = open(options.output_dir+targetid+"_off_targets.json","w")
			output.write(output_offtargets)
			output.close()	
			
			# pretty sequence encoding for on-target region
			output_sequence_w = ""
			output_sequence_c = ""
			lastpos = " "
			interruptions = 0
			for i in range(len(targetStr)):
				if (targetStr[i] != lastpos):
					if (lastpos !=" "):
						output_sequence_w+="</span>"
						output_sequence_c+="</span>"
					if (targetStr[i] =="+"):
						output_sequence_w += "<span class='valid_triad'>"
						output_sequence_c += "<span class='valid_triad'>"
					elif (targetStr[i] =="_"):
						output_sequence_w += "<span class='invalid_triad'>"
						output_sequence_c += "<span class='invalid_triad'>"
					lastpos = targetStr[i]
					if (targetStr[i] =="_"):
						interruptions += 1
				if (targetStr[i] ==" "):
					output_sequence_w += targetSeq.lower()[i]
					output_sequence_c += targetSeq.complement().lower()[i]
				else:
					output_sequence_w += targetSeq.upper()[i]
					output_sequence_c += targetSeq.complement().upper()[i]				
			if (lastpos == "+" or lastpos == "_"):
				output_sequence_w+="</span>"
				output_sequence_c+="</span>"
			output_sequence = "<span class='monospace flank_triad'>5'-"+output_sequence_w+"-3'<br/>3'-"+output_sequence_c+"-5'</span>"

			# count the number of off-targets
			offtarget_counter = 0
			for tfo_region in tfotargets[targetid].keys():
				for tfo_errors in tfotargets[targetid][tfo_region].keys():
					offtarget_counter+=len(tfotargets[targetid][tfo_region][tfo_errors])

			output_ontargets += '\n\t\t["%s","%s",%d,%d,%d,"%s",%d,"%s",%d,"%s",%d,%d],' % (targetid, chrom, tstart, tstop, (tstop-tstart), on_chromatin_features, interruptions, output_sequence, offtarget_counter, tstrand, lflank, rflank)
	
	# finalizes on-target json object and write output
	output_ontargets = '''{
	"bPaginate": true,
	"bProcessing": true,
	"bAutoWidth": false,
	"bInfo": true,
	"bLengthChange": true,
    "bFilter": false,
	"iDisplayLength": 10,
	"bJQueryUI": true,
	"aLengthMenu": [[5, 10, 25, -1], [5, 10, 25, "All"]],
	"aoColumns": [
		{ "sTitle": "region Id", "sToolTip":"internal id used to identify regions of overlapping putative primary targets" },
		{ "sTitle": "chr", "sToolTip":"chromosome this region is located in" },
		{ "sTitle": "start", "sType": "numeric", "sToolTip":"chromosomal start position of this region" },
		{ "sTitle": "end", "sType": "numeric", "sToolTip":"chromosomal end position of this region" },
		{ "sTitle": "length", "sType": "numeric", "sToolTip":"number of nucleotides spanned by this target region" },
		{ "sTitle": "chromatin", "bVisible": '''+withChromatin+''', "sToolTip":"chromatin score averaged over the shown region" },
		{ "sTitle": "Y-interruptions", "sType": "numeric", "sToolTip":"number of pyrimdine interruptions in the polypurine/polypyrimidine tract of the region" },
		{ "sTitle": "on-target region", "bSortable": false, "sClass": "left", "sToolTip":"sequence of the region (plus additional flanking positions if specified)"  },
		{ "sTitle": "off-targets", "bSearchable": false, "bVisible": false, "sToolTip":"total number of off-targets accumulated over all putative primary targets in this region" },
		{ "sTitle": "strand", "bSearchable": false, "bVisible": false, "sToolTip":"strand on which the purine tract is located" },
		{ "sTitle": "offset left", "sType": "numeric", "bSearchable": false, "bVisible": false, "sToolTip":"number of upstream flanking positions shown in the sequence" },
		{ "sTitle": "offset right", "sType": "numeric", "bSearchable": false, "bVisible": false, "sToolTip":"number of downstream flanking positions shown in the sequence" }
	],	
	"aaData": [''' + output_ontargets[:-1] + '\n	]\n}' 
	
	output = open(options.output_dir+"primary_target_regions.json","w")
	output.write(output_ontargets)
	output.close()	
	

	# close file handle
	if (options.chromatin != "" and options.chromatinFormat == "bam"):
		try:
			samfile.close()
		except:
			exit(1)
Пример #12
0
class BigWig(object):
    def __init__(self, filename):
        self.filename = filename
        self.determine_sizes()
        self.bwf = BigWigFile(open(filename))

    def determine_sizes(self):
        self.sizes = {}
        fh = open(self.filename, "rb")
        # read magic number to guess endianness
        magic = fh.read(4)
        if magic == '&\xfc\x8f\x88':
            endianness = '<'
        elif magic == '\x88\x8f\xfc&':
            endianness = '>'
        else:
            raise IOError("The file is not in bigwig format")

        # read the header
        info = struct.unpack(endianness + 'HHQQQHHQQIQ', fh.read(60))
        self.version = info[0]
        self.zoom_levels = info[1]
        self.chromosome_tree_offset = info[2]
        self.full_data_offset = info[3]
        self.full_index_offset = info[4]
        self.field_count = info[5]
        self.defined_field_count = info[6]
        self.auto_SQL_offset = info[7]
        self.total_summary_offset = info[8]
        self.uncompress_buf_size = info[9]
        
        # go to the data
        fh.seek(self.chromosome_tree_offset)
        # read magic again
        magic = fh.read(4)
        if magic == '\x91\x8c\xcax':
            endianness = '<'
        elif magic == 'x\xca\x8c\x91':
            endianness = '>'
        else:
            raise ValueError("Wrong magic for this bigwig data file")

        info2 = struct.unpack(endianness + 'IIIQQ', fh.read(28))
        self.block_size = info2[0]
        self.key_size = info2[1]
        self.val_size = info2[2]
        self.item_count = info2[3]

        info3 = struct.unpack(endianness + 'BBH', fh.read(4))
        self.is_leaf = info3[0]
        self.count = info3[2]

        for n in range(self.count):
            format_code = endianness + str(self.key_size) + 'sII'
            info = struct.unpack(format_code, fh.read(self.key_size + 2 * 4))
            key, chrom_id, chrom_size = info

            key = key.replace('\x00', '')
            self.sizes[key] = chrom_size

    def get_as_array(self, chrom, start, end):
        return self.bwf.get_as_array(chrom, start, end)

    def get(self, chrom, start, end):
        return self.bwf.get(chrom, start, end)

    def query(self, chrom, start, end, number):
        return self.bwf.query(chrom, start, end, number)
Пример #13
0
def process():
    # get the data
    readdata()

    # set chromatin flag
    withChromatin = 'false'
    if (options.chromatin != ""):
        withChromatin = 'true'

    samfile = ""
    bigwig = ""
    # check if chromatin file has been supplied
    if (options.chromatin != "" and options.chromatinFormat == "bam"):
        import pysam  # lazy import of required module
        # try open the bam file
        try:
            samfile = pysam.Samfile(options.chromatin, "rb")
        except:
            exit(1)
    elif (options.chromatin != "" and options.chromatinFormat == "bigwig"):
        from bx.intervals.io import GenomicIntervalReader  # lazy import of required module
        from bx.bbi.bigwig_file import BigWigFile  # lazy import of required module
        # try open the bigwig file
        try:
            bigwig = BigWigFile(open(options.chromatin))
        except:
            exit(1)

    processors = 1
    if (options.processors <= 0):  # figure out number of processors
        try:
            processors = multiprocessing.cpu_count()
        except:
            processors = 1
    else:
        processors = options.processors

    output_ontargets = ""  # gets filled with a json object for on-targets

    # process one on-target region/locus of interest (LOI) at a time
    for loi in targets.keys():
        # info for whole region
        bed = loiBed[loi]
        chrom = bed[0]
        cstart = int(bed[1])
        cstop = int(bed[2])
        clabel = bed[3]
        cstrand = bed[5]
        tts = records[loi].seq

        # process individual eligible target sites in this region/locus
        for target in targets[loi]:
            tstart = int(target[1])
            tstop = int(target[2])
            targetid = target[3]
            tstrand = target[5]
            tsize = tstop - tstart
            lflank = min(tstart - cstart, options.flanks)
            rflank = min(cstop - tstop, options.flanks)

            targetSeq = ""
            if (cstrand == "+"):
                targetSeq = tts[tstart - cstart - lflank:tstop - cstart +
                                rflank]
            else:
                targetSeq = tts.reverse_complement()[tstart - cstart -
                                                     lflank:tstop - cstart +
                                                     rflank]
            targetStr = [" " for i in range(lflank + tsize + rflank)]

            for i in range(lflank, lflank + tsize):
                targetStr[i] = '+'

            if (target[8] != ""):
                for s in target[8].strip('d').split('d'):
                    if (cstrand == "+"):
                        targetStr[lflank + int(s)] = '_'
                    else:
                        targetStr[lflank + tsize - int(s) - 1] = '_'

            on_chromatin_features = 0.
            if (options.chromatin != "" and options.chromatinFormat == "bam"):
                try:
                    for pileupcolumn in samfile.pileup(chrom, tstart - lflank,
                                                       tstop + rflank):
                        on_chromatin_features += pileupcolumn.n
                    on_chromatin_features = "%.3f" % (
                        on_chromatin_features / (lflank + tsize + rflank))
                except:
                    print >> sys.stderr, "[WARN] bam file exception : %s:%d-%d" % (
                        chrom, tstart - lflank, tstop + rflank)
            elif (options.chromatin != ""
                  and options.chromatinFormat == "bigwig"):
                try:
                    bwsummary = bigwig.query(chrom, tstart - lflank,
                                             tstop + rflank, 1)
                    on_chromatin_features = "%.3f" % (bwsummary[0]["mean"])
                except:
                    print >> sys.stderr, "[WARN] bigwig file exception : %s:%d-%d" % (
                        chrom, tstart - lflank, tstop + rflank)
            else:
                on_chromatin_features = "-"

            off_targets = {
            }  # hashing start positions (w/r/t on target) and encoding (grouping off-targets)

            # branch point
            if (processors == 1):  # no need to multiprocess

                for tfo_region in tfotargets[targetid].keys():
                    # process each error category
                    for tfo_error in tfotargets[targetid][tfo_region].keys():
                        processOfftarget(off_targets, targetid, tfo_region,
                                         tfo_error, lflank, rflank, tsize,
                                         tstrand, samfile, bigwig)

            else:  # contribute workload
                # load up work queue
                work_queue = multiprocessing.JoinableQueue(processors * 3)

                # create a queue to pass to workers to store the results
                result_queue = multiprocessing.Queue()

                num_jobs = 0
                for tfo_region in tfotargets[targetid].keys():
                    num_jobs += len(tfotargets[targetid][tfo_region])

                # spawn workers
                for i in range(min(num_jobs, processors)):
                    worker = Worker(work_queue, result_queue,
                                    options.chromatin, options.chromatinFormat)
                    worker.daemon = True
                    worker.start()

                # process off-targets w/r/t to on-target
                # process each intersecting interval
                for tfo_region in tfotargets[targetid].keys():
                    # process each error category
                    for tfo_error in tfotargets[targetid][tfo_region].keys():
                        work_queue.put(
                            Job(targetid, tfo_region, tfo_error, lflank,
                                rflank, tsize, tstrand))

                work_queue.close()
                work_queue.join()

                for job in range(num_jobs):
                    result = result_queue.get()
                    for offset in result.keys():
                        if (not off_targets.has_key(offset)):
                            off_targets[offset] = {}
                        for encoding in result[offset].keys():
                            if (not off_targets[offset].has_key(encoding)):
                                off_targets[offset][encoding] = result[offset][
                                    encoding]

            # merge point
            output_sequence_w = ""
            output_sequence_c = ""
            for i in range(len(targetStr)):
                if (targetStr[i] == " "):
                    output_sequence_w += targetSeq.lower()[i]
                    output_sequence_c += targetSeq.complement().lower()[i]
                else:
                    output_sequence_w += targetSeq.upper()[i]
                    output_sequence_c += targetSeq.complement().upper()[i]

            # generate oaColumns (data table header)
            output_offtargets = ""
            for i in range(len(annotations)):
                output_offtargets += (
                    ',\n		{ "sTitle":"%s", "sToolTip":"number of off-targets intersecting with annotation data: %s" }'
                    % (annotations[i], annotations[i]))
            output_offtargets += (
                ',\n		{ "sTitle":"","bSortable":false,"bSearchable":false,"sClass":"btn_details","sToolTip":"Toggle detailed off-target inspector" }\n	],\n	"aaData":['
            )

            # generate aaData (data table content)
            for offset in off_targets.keys():
                for encoding in off_targets[offset]:
                    ots = off_targets[offset][encoding]
                    # format chromatin data if available
                    if (options.chromatin != ""):
                        ots["chromatin"] = "%.3f" % (ots["chromatin_max"])
                    else:
                        ots["chromatin"] = "-"

                    output_offtargets += '\n		["%s",0,%d,%d,"%d-%d",%d,"%s",%d,[' % (
                        encoding,
                        ots["errors"],
                        offset,
                        offset - lflank,
                        offset + len(encoding) - lflank,
                        ots["copies"],
                        ots["chromatin"],
                        ots["length"],
                    )
                    for ot in ots["data"]:
                        output_offtargets += '\n			["%s",%d,%d,"%s",%s],' % (
                            ot)

                    output_offtargets = output_offtargets[:-1] + "\n		]"
                    if (len(annotations) > 0):
                        output_offtargets += ",%s" % (",".join(
                            ["%s" % el for el in ots["annotation"]]))
                    output_offtargets += ',""],'

            # finalise off-target json object and write output
            output_offtargets = ('''{
	"bPaginate": true,
	"bProcessing": true,
	"bAutoWidth": false,
	"bInfo": true,
	"bLengthChange": true,
    "bFilter": true,
	"iDisplayLength": 10,
	"aLengthMenu": [[10, 50, 100, -1], [10, 50, 100, "All"]],
	"bJQueryUI": true,
	"bDeferRender": true,
	"aaSorting": [[2, "asc"]],
	"aoColumns": [
		{ "sTitle":"%s","sClass":"monospace left", "sToolTip":"shows the part of the primary target that is responsible for off-targets" },
		{ "sTitle":"overlap", "sType": "numeric","sToolTip":"number of nucleotide positions off-targets overlap the primary target" },
		{ "sTitle":"errors", "sType": "numeric","sToolTip":"expected number of mismatches between the triplex-forming molecule designed against the primary target and off-targets" },
		{ "sTitle":"offset","sType": "numeric","bSearchable":false,"bVisible":false, "sToolTip":"the offset is used internally for computing the lefthand-side alignment"  },
		{ "sTitle":"sub region", "sToolTip":"region of the primary target this off-target spans" },
		{ "sTitle":"copies", "sType": "numeric","sToolTip":"number of copies of this off-target category" },
		{ "sTitle":"max. chromatin", "bVisible": %s, "sToolTip":"maximal chromatin score observed in any of the off-targets" },
		{ "sTitle":"length", "sType": "numeric","sToolTip":"length of the off-target", "bVisible":false },
		{ "sTitle":"offtargets","bVisible": false, "sToolTip":"detailed off-target list with location and annotation"  } '''
                                 + output_offtargets[:-1] + "\n	]\n}") % (
                                     output_sequence_w + "<br/>" +
                                     output_sequence_c, withChromatin)

            output = open(options.output_dir + targetid + "_off_targets.json",
                          "w")
            output.write(output_offtargets)
            output.close()

            # pretty sequence encoding for on-target region
            output_sequence_w = ""
            output_sequence_c = ""
            lastpos = " "
            interruptions = 0
            for i in range(len(targetStr)):
                if (targetStr[i] != lastpos):
                    if (lastpos != " "):
                        output_sequence_w += "</span>"
                        output_sequence_c += "</span>"
                    if (targetStr[i] == "+"):
                        output_sequence_w += "<span class='valid_triad'>"
                        output_sequence_c += "<span class='valid_triad'>"
                    elif (targetStr[i] == "_"):
                        output_sequence_w += "<span class='invalid_triad'>"
                        output_sequence_c += "<span class='invalid_triad'>"
                    lastpos = targetStr[i]
                    if (targetStr[i] == "_"):
                        interruptions += 1
                if (targetStr[i] == " "):
                    output_sequence_w += targetSeq.lower()[i]
                    output_sequence_c += targetSeq.complement().lower()[i]
                else:
                    output_sequence_w += targetSeq.upper()[i]
                    output_sequence_c += targetSeq.complement().upper()[i]
            if (lastpos == "+" or lastpos == "_"):
                output_sequence_w += "</span>"
                output_sequence_c += "</span>"
            output_sequence = "<span class='monospace flank_triad'>5'-" + output_sequence_w + "-3'<br/>3'-" + output_sequence_c + "-5'</span>"

            # count the number of off-targets
            offtarget_counter = 0
            for tfo_region in tfotargets[targetid].keys():
                for tfo_errors in tfotargets[targetid][tfo_region].keys():
                    offtarget_counter += len(
                        tfotargets[targetid][tfo_region][tfo_errors])

            output_ontargets += '\n\t\t["%s","%s",%d,%d,%d,"%s",%d,"%s",%d,"%s",%d,%d],' % (
                targetid, chrom, tstart, tstop,
                (tstop - tstart), on_chromatin_features, interruptions,
                output_sequence, offtarget_counter, tstrand, lflank, rflank)

    # finalizes on-target json object and write output
    output_ontargets = '''{
	"bPaginate": true,
	"bProcessing": true,
	"bAutoWidth": false,
	"bInfo": true,
	"bLengthChange": true,
    "bFilter": false,
	"iDisplayLength": 10,
	"bJQueryUI": true,
	"aLengthMenu": [[5, 10, 25, -1], [5, 10, 25, "All"]],
	"aoColumns": [
		{ "sTitle": "region Id", "sToolTip":"internal id used to identify regions of overlapping putative primary targets" },
		{ "sTitle": "chr", "sToolTip":"chromosome this region is located in" },
		{ "sTitle": "start", "sType": "numeric", "sToolTip":"chromosomal start position of this region" },
		{ "sTitle": "end", "sType": "numeric", "sToolTip":"chromosomal end position of this region" },
		{ "sTitle": "length", "sType": "numeric", "sToolTip":"number of nucleotides spanned by this target region" },
		{ "sTitle": "chromatin", "bVisible": ''' + withChromatin + ''', "sToolTip":"chromatin score averaged over the shown region" },
		{ "sTitle": "Y-interruptions", "sType": "numeric", "sToolTip":"number of pyrimdine interruptions in the polypurine/polypyrimidine tract of the region" },
		{ "sTitle": "on-target region", "bSortable": false, "sClass": "left", "sToolTip":"sequence of the region (plus additional flanking positions if specified)"  },
		{ "sTitle": "off-targets", "bSearchable": false, "bVisible": false, "sToolTip":"total number of off-targets accumulated over all putative primary targets in this region" },
		{ "sTitle": "strand", "bSearchable": false, "bVisible": false, "sToolTip":"strand on which the purine tract is located" },
		{ "sTitle": "offset left", "sType": "numeric", "bSearchable": false, "bVisible": false, "sToolTip":"number of upstream flanking positions shown in the sequence" },
		{ "sTitle": "offset right", "sType": "numeric", "bSearchable": false, "bVisible": false, "sToolTip":"number of downstream flanking positions shown in the sequence" }
	],	
	"aaData": [''' + output_ontargets[:-1] + '\n	]\n}'

    output = open(options.output_dir + "primary_target_regions.json", "w")
    output.write(output_ontargets)
    output.close()

    # close file handle
    if (options.chromatin != "" and options.chromatinFormat == "bam"):
        try:
            samfile.close()
        except:
            exit(1)