def assemble_locus(transcripts, locus_id_value_obj, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, config, gtf_fileh, bed_fileh, bedgraph_filehs): # gather properties of locus locus_chrom = transcripts[0].chrom locus_start = transcripts[0].start locus_end = max(tx.end for tx in transcripts) logging.debug("[LOCUS] %s:%d-%d %d transcripts" % (locus_chrom, locus_start, locus_end, len(transcripts))) locus_id_str = "L%d" % (locus_id_value_obj.next()) # filter transcripts logging.debug("\tFiltering transcripts") transcripts = filter_transcripts(transcripts, config.min_transcript_length, config.guided) # build transcript graphs transcript_graphs = \ create_transcript_graphs(locus_chrom, transcripts, min_trim_length=config.min_trim_length, trim_utr_fraction=config.trim_utr_fraction, trim_intron_fraction=config.trim_intron_fraction, create_bedgraph=config.create_bedgraph, bedgraph_filehs=bedgraph_filehs) for tg in transcript_graphs: logging.debug( "Subgraph %s:%d-%d(%s) %d nodes %d paths" % (locus_chrom, locus_start, locus_end, strand_int_to_str( tg.strand), len(tg.Gsub), len(tg.partial_paths))) # assemble subgraph assemble_gene(locus_chrom, locus_id_str, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, tg.Gsub, tg.strand, tg.partial_paths, config, gtf_fileh, bed_fileh)
def assemble_locus( transcripts, locus_id_value_obj, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, config, gtf_fileh, bed_fileh, bedgraph_filehs, ): def get_bedgraph_lines(chrom, G): for n in sorted(G.nodes()): if n.start < 0: continue fields = (chrom, n.start, n.end, G.node[n][NODE_SCORE]) yield fields # gather properties of locus locus_chrom = transcripts[0].chrom locus_start = transcripts[0].start locus_end = max(tx.end for tx in transcripts) logging.debug("[LOCUS] %s:%d-%d %d transcripts" % (locus_chrom, locus_start, locus_end, len(transcripts))) locus_id_str = "L%d" % (locus_id_value_obj.next()) # filter transcripts logging.debug("\tFiltering transcripts") transcripts = filter_transcripts(transcripts, config.min_transcript_length, config.guided) # build transcript graphs for G, strand, strand_transcripts in create_transcript_graphs(transcripts): # output bedgraph if config.create_bedgraph: for fields in get_bedgraph_lines(locus_chrom, G): print >>bedgraph_filehs[strand], "\t".join(map(str, fields)) # process transcript graphs for Gsub, strand, partial_paths in prune_transcript_graph( G, strand, strand_transcripts, config.min_trim_length, config.trim_utr_fraction, config.trim_intron_fraction ): logging.debug( "Subgraph %s:%d-%d(%s) %d nodes %d paths" % (locus_chrom, locus_start, locus_end, strand_int_to_str(strand), len(Gsub), len(partial_paths)) ) # assemble subgraph assemble_gene( locus_chrom, locus_id_str, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, Gsub, strand, partial_paths, config, gtf_fileh, bed_fileh, )
def assemble_locus(transcripts, locus_id_value_obj, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, config, gtf_fileh, bed_fileh, bedgraph_filehs): def get_bedgraph_lines(chrom, G): for n in sorted(G.nodes()): if n.start < 0: continue fields = (chrom, n.start, n.end, G.node[n][NODE_SCORE]) yield fields # gather properties of locus locus_chrom = transcripts[0].chrom locus_start = transcripts[0].start locus_end = max(tx.end for tx in transcripts) logging.debug("[LOCUS] %s:%d-%d %d transcripts" % (locus_chrom, locus_start, locus_end, len(transcripts))) locus_id_str = "L%d" % (locus_id_value_obj.next()) # filter transcripts logging.debug("\tFiltering transcripts") transcripts = filter_transcripts(transcripts, config.min_transcript_length, config.guided) # build transcript graphs for G, strand, strand_transcripts in \ create_transcript_graphs(transcripts): # output bedgraph if config.create_bedgraph: for fields in get_bedgraph_lines(locus_chrom, G): print >>bedgraph_filehs[strand], '\t'.join(map(str,fields)) # process transcript graphs for Gsub, strand, partial_paths in \ prune_transcript_graph(G, strand, strand_transcripts, config.min_trim_length, config.trim_utr_fraction, config.trim_intron_fraction): logging.debug("Subgraph %s:%d-%d(%s) %d nodes %d paths" % (locus_chrom, locus_start, locus_end, strand_int_to_str(strand), len(Gsub), len(partial_paths))) # assemble subgraph assemble_gene(locus_chrom, locus_id_str, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, Gsub, strand, partial_paths, config, gtf_fileh, bed_fileh)
def assemble_locus(transcripts, locus_id_value_obj, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, config, gtf_fileh, bed_fileh, bedgraph_filehs): # gather properties of locus locus_chrom = transcripts[0].chrom locus_start = transcripts[0].start locus_end = max(tx.end for tx in transcripts) logging.debug("[LOCUS] %s:%d-%d %d transcripts" % (locus_chrom, locus_start, locus_end, len(transcripts))) locus_id_str = "L%d" % (locus_id_value_obj.next()) # filter transcripts logging.debug("\tFiltering transcripts") transcripts = filter_transcripts(transcripts, config.min_transcript_length, config.guided) # build transcript graphs transcript_graphs = \ create_transcript_graphs(locus_chrom, transcripts, min_trim_length=config.min_trim_length, trim_utr_fraction=config.trim_utr_fraction, trim_intron_fraction=config.trim_intron_fraction, create_bedgraph=config.create_bedgraph, bedgraph_filehs=bedgraph_filehs) for tg in transcript_graphs: logging.debug("Subgraph %s:%d-%d(%s) %d nodes %d paths" % (locus_chrom, locus_start, locus_end, strand_int_to_str(tg.strand), len(tg.Gsub), len(tg.partial_paths))) # assemble subgraph assemble_gene(locus_chrom, locus_id_str, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, tg.Gsub, tg.strand, tg.partial_paths, config, gtf_fileh, bed_fileh)
def get_transcript_graphs(transcripts): GG = {} for G, strand, strand_transcript_map in \ create_transcript_graphs(transcripts): GG[strand] = (G, strand_transcript_map) return GG