def _consumer(input_queue, output_queue): while True: line = input_queue.get() if line is None: break f = BEDFeature.from_string(line) # retrieve conservation data bigwig_file = chrom_bigwig_dict[f.chrom] arr = extract_bigwig_data(f, bigwig_file) # measure conservation at various sliding windows window_scores = [] for window_size in window_sizes: window_scores.append(best_sliding_window(arr, window_size, np.mean)) # measure average conservation finitearr = arr[np.isfinite(arr)] if len(finitearr) == 0: mean_cons = np.nan else: mean_cons = np.mean(finitearr) fields = [f.name, '%s:%d-%d[%s]' % (f.chrom, f.tx_start, f.tx_end, f.strand), str(len(arr)), str(mean_cons)] fields.extend(map(str,window_scores)) result = '\t'.join(fields) output_queue.put(result) output_queue.put(None)
def bed_feature_conservation(f, chrom_bigwig_dict, hists): # retrieve conservation data bigwig_file = chrom_bigwig_dict[f.chrom] arr = extract_bigwig_data(f, bigwig_file) # ignore missing values finitearr = arr[np.isfinite(arr)] if len(finitearr) == 0: cons_str = 'NA' else: hists[f.name] += np.histogram(np.clip(finitearr, BIN_MIN, BIN_MAX), BINS)[0] cons_str = str(np.sum(finitearr)) fields = f.name.split('|') fields.extend([f.chrom, str(f.tx_start), str(f.tx_end), f.strand, str(len(finitearr)), cons_str]) return fields
def bed_feature_conservation(f, chrom_bigwig_dict, hists): # retrieve conservation data bigwig_file = chrom_bigwig_dict[f.chrom] arr = extract_bigwig_data(f, bigwig_file) # ignore missing values finitearr = arr[np.isfinite(arr)] if len(finitearr) == 0: cons_str = 'NA' else: hists[f.name] += np.histogram(np.clip(finitearr, BIN_MIN, BIN_MAX), BINS)[0] cons_str = str(np.sum(finitearr)) fields = f.name.split('|') fields.extend([ f.chrom, str(f.tx_start), str(f.tx_end), f.strand, str(len(finitearr)), cons_str ]) return fields
def conservation_serial(bed_file, window_sizes, chrom_bigwig_dict): # output header fields fields = ['name', 'position', 'transcript_length', 'mean'] fields.extend(map(str,window_sizes)) print '\t'.join(fields) # process bed file for f in BEDFeature.parse(open(bed_file)): # retrieve conservation data bigwig_file = chrom_bigwig_dict[f.chrom] arr = extract_bigwig_data(f, bigwig_file) # measure conservation at various sliding windows window_scores = [] for window_size in window_sizes: window_scores.append(best_sliding_window(arr, window_size, np.mean)) # calc mean conservation finitearr = arr[np.isfinite(arr)] if len(finitearr) == 0: mean_cons = np.nan else: mean_cons = np.mean(finitearr) fields = [f.name, '%s:%d-%d[%s]' % (f.chrom, f.tx_start, f.tx_end, f.strand), str(len(arr)), str(mean_cons)] fields.extend(map(str,window_scores)) print '\t'.join(fields)