def main(): # Parse command line options, args = doc_optparse.parse(__doc__) try: score_fname = args[0] interval_fname = args[1] if len(args) > 2: out_file = open(args[2], 'w') else: out_file = sys.stdout binned = bool(options.binned) mask_fname = options.mask except Exception: doc_optparse.exit() if binned: scores_by_chrom = load_scores_ba_dir(score_fname) else: scores_by_chrom = load_scores_wiggle(score_fname) if mask_fname: masks = binned_bitsets_from_file(open(mask_fname)) else: masks = None for line in open(interval_fname): fields = line.split() chrom, start, stop = fields[0], int(fields[1]), int(fields[2]) total = 0 count = 0 min_score = 100000000 max_score = -100000000 for i in range(start, stop): if chrom in scores_by_chrom and scores_by_chrom[chrom][i]: # Skip if base is masked if masks and chrom in masks: if masks[chrom][i]: continue # Get the score, only count if not 'nan' score = scores_by_chrom[chrom][i] if not isNaN(score): total += score count += 1 max_score = max(score, max_score) min_score = min(score, min_score) if count > 0: avg = total / count else: avg = "nan" min_score = "nan" max_score = "nan" print("\t".join( map(str, [chrom, start, stop, avg, min_score, max_score])), file=out_file) out_file.close()
def main(): # Parse command line options, args = doc_optparse.parse(__doc__) try: score_fname = args[0] interval_fname = args[1] if len(args) > 2: out_file = open(args[2], "w") else: out_file = sys.stdout binned = bool(options.binned) mask_fname = options.mask except: doc_optparse.exit() if binned: scores_by_chrom = load_scores_ba_dir(score_fname) else: scores_by_chrom = load_scores_wiggle(score_fname) if mask_fname: masks = binned_bitsets_from_file(open(mask_fname)) else: masks = None for line in open(interval_fname): fields = line.split() chrom, start, stop = fields[0], int(fields[1]), int(fields[2]) total = 0 count = 0 min_score = 100000000 max_score = -100000000 for i in range(start, stop): if chrom in scores_by_chrom and scores_by_chrom[chrom][i]: # Skip if base is masked if masks and chrom in masks: if masks[chrom][i]: continue # Get the score, only count if not 'nan' score = scores_by_chrom[chrom][i] if not isNaN(score): total += score count += 1 max_score = max(score, max_score) min_score = min(score, min_score) if count > 0: avg = total / count else: avg = "nan" min_score = "nan" max_score = "nan" print >> out_file, "\t".join(map(str, [chrom, start, stop, avg, min_score, max_score])) out_file.close()
def __init__(self, wigFile): '''read wig file, creat wig obj''' self.scores = {} self.num_re = re.compile(r'[\d\.\-\+]+') fh = open(wigFile) #infile=open(wigFile,'r') for i, (chrom, pos, val) in enumerate(bx.wiggle.Reader(fh)): chrom = chrom.upper() if not chrom in self.scores: self.scores[chrom] = BinnedArray() tmp = self.scores[chrom][pos] if isNaN(tmp): self.scores[chrom][pos] = val else: self.scores[chrom][pos] += val if i % 100000 == 0: print "%i datapoints loaded \r" % i #print self.scores.keys() print "total " + str(i) + " points loaded"
def __init__(self,wigFile): '''read wig file, creat wig obj''' self.scores = {} self.num_re=re.compile(r'[\d\.\-\+]+'); fh=open(wigFile) #infile=open(wigFile,'r') for i, ( chrom, pos, val ) in enumerate( bx.wiggle.Reader( fh ) ): chrom=chrom.upper() if not chrom in self.scores: self.scores[chrom] = BinnedArray() tmp=self.scores[chrom][pos] if isNaN(tmp): self.scores[chrom][pos] = val else: self.scores[chrom][pos] += val if i % 100000 == 0: print "%i datapoints loaded \r" % i #print self.scores.keys() print "total " + str(i) + " points loaded"