def view_track(parser, options): tf = TrackFactory(options.file, "r") if not tf.has_track(options.name): tf.close() parser.error("trackfactory '%s' does not contain track '%s'" % (options.file, options.name)) region = parse_interval(options.region) t = tf.get_track(options.name) track_type = t.get_type() logging.debug("opened track '%s' type '%s'" % (options.name, track_type)) if track_type == SequenceTrack.__name__: print t[region] if track_type == ArrayTrack.__name__: if options.file_type == "bedgraph": t.tobedgraph(region, sys.stdout) else: print t[region] elif track_type == VectorTrack.__name__: if options.file_type == "bedgraph": readnum = options.readnum allele = options.allele t.tobedgraph(region, sys.stdout, norm=True, read=readnum, allele=allele) else: print t[region] elif track_type == RnaseqTrack.__name__: cov_track = t.get_coverage_track() print cov_track.density(region) junc_track = t.get_junction_track() print junc_track[region] logging.debug("done") tf.close()
def main(): logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s") parser = argparse.ArgumentParser() parser.add_argument("--stranded", dest="stranded", action="store_true", default=False) parser.add_argument("--ambiguous", dest="ambiguous", action="store_true", default=False) parser.add_argument("--aliases", dest="alias_file", default=None) parser.add_argument("bed") parser.add_argument("track_files", nargs="+") options = parser.parse_args() alias_dict = {} alias_header = [] if options.alias_file is not None: for line in open(options.alias_file): if line.startswith("#"): alias_header = line.strip()[1:].split('\t') continue fields = line.strip().split('\t') alias_dict[fields[0]] = fields[1:] header_fields = alias_header + ["gene_name", "gene_interval", "gene_length"] tracks = [] for track_path in options.track_files: name, path = track_path.split("@") file_path, h5_path = path.split(":") tf = TrackFactory(file_path, "r") t = tf.get_track(h5_path) tracks.append((name, tf, t, set(t.get_rnames()))) if options.stranded: header_fields.append("%s_sense" % name) header_fields.append("%s_antisense" % name) else: header_fields.append(name) # output header print '\t'.join(map(str, header_fields)) # read genes if options.ambiguous: genes = list(BedGene.parse(open(options.bed))) else: genes = filter_strand_conflicts(options.bed) # get counts for g in genes: alias_fields = alias_dict.get(g.name, ["None"] * len(alias_header)) fields = ([g.name] + alias_fields + ["%s[%s]:%d-%d" % (g.chrom, g.strand, g.tx_start, g.tx_end), sum((end-start) for start,end in g.exons)]) sense_strand = NEG_STRAND if g.strand == "+" else POS_STRAND antisense_strand = int(not sense_strand) rname_found = False for name, tf, t, rnames in tracks: if g.chrom not in rnames: continue rname_found = True if options.stranded: sense_count = 0 antisense_count = 0 for start, end in g.exons: sense_count += t.count((g.chrom, start, end, sense_strand)) antisense_count += t.count((g.chrom, start, end, antisense_strand)) fields.append(sense_count) fields.append(antisense_count) else: count = 0 for start, end in g.exons: count += t.count((g.chrom, start, end)) fields.append(count) if rname_found: print '\t'.join(map(str, fields)) for name,tf,t,rnames in tracks: tf.close()