def test_assembler1(self): # setup correct transcripts PATH_ABCDE = tuple([Exon(0,100), Exon(200,300), Exon(400,500),Exon(600,700), Exon(800,900)]) PATH_ACE = tuple([Exon(0,100), Exon(400,500), Exon(800,900)]) PATH_ABCE = tuple([Exon(0,100), Exon(200,300), Exon(400,500), Exon(800,900)]) PATH_ACDE = tuple([Exon(0,100), Exon(400,500),Exon(600,700), Exon(800,900)]) # read transcripts transcripts = read_first_locus("assemble1.gtf", score_attr="score") GG = get_transcript_graphs(transcripts) G,tmap = GG[POS_STRAND] # set transcript scores tmap["ABCDE"].score = 2.0 tmap["ACE"].score = 1.0 tmap["ABCE"].score = 1.0 tmap["ACDE"].score = 1.0 # set assembly parameter kmax = 2 # assemble GS = list(prune_transcript_graph(G, POS_STRAND, tmap, min_trim_length=0, trim_utr_fraction=0, trim_intron_fraction=0)) Gsub, strand, partial_paths = GS[0] results = list(assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=kmax, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 2) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 3.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 2.0, places=3) # set transcript scores tmap["ABCDE"].score = 4.0 tmap["ACE"].score = 3.0 tmap["ABCE"].score = 2.0 tmap["ACDE"].score = 1.0 # set assembly parameter kmax = 3 # assemble GS = list(prune_transcript_graph(G, POS_STRAND, tmap, min_trim_length=0, trim_utr_fraction=0, trim_intron_fraction=0)) Gsub, strand, partial_paths = GS[0] results = list(assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=kmax, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 4) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 4.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 3.0, places=3) self.assertEqual(tuple(results[2].path), PATH_ABCE) self.assertAlmostEqual(results[2].score, 2.0, places=3) self.assertEqual(tuple(results[3].path), PATH_ACDE) self.assertAlmostEqual(results[3].score, 1.0, places=3)
def assemble_locus( transcripts, locus_id_value_obj, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, config, gtf_fileh, bed_fileh, bedgraph_filehs, ): def get_bedgraph_lines(chrom, G): for n in sorted(G.nodes()): if n.start < 0: continue fields = (chrom, n.start, n.end, G.node[n][NODE_SCORE]) yield fields # gather properties of locus locus_chrom = transcripts[0].chrom locus_start = transcripts[0].start locus_end = max(tx.end for tx in transcripts) logging.debug("[LOCUS] %s:%d-%d %d transcripts" % (locus_chrom, locus_start, locus_end, len(transcripts))) locus_id_str = "L%d" % (locus_id_value_obj.next()) # filter transcripts logging.debug("\tFiltering transcripts") transcripts = filter_transcripts(transcripts, config.min_transcript_length, config.guided) # build transcript graphs for G, strand, strand_transcripts in create_transcript_graphs(transcripts): # output bedgraph if config.create_bedgraph: for fields in get_bedgraph_lines(locus_chrom, G): print >>bedgraph_filehs[strand], "\t".join(map(str, fields)) # process transcript graphs for Gsub, strand, partial_paths in prune_transcript_graph( G, strand, strand_transcripts, config.min_trim_length, config.trim_utr_fraction, config.trim_intron_fraction ): logging.debug( "Subgraph %s:%d-%d(%s) %d nodes %d paths" % (locus_chrom, locus_start, locus_end, strand_int_to_str(strand), len(Gsub), len(partial_paths)) ) # assemble subgraph assemble_gene( locus_chrom, locus_id_str, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, Gsub, strand, partial_paths, config, gtf_fileh, bed_fileh, )
def assemble_locus(transcripts, locus_id_value_obj, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, config, gtf_fileh, bed_fileh, bedgraph_filehs): def get_bedgraph_lines(chrom, G): for n in sorted(G.nodes()): if n.start < 0: continue fields = (chrom, n.start, n.end, G.node[n][NODE_SCORE]) yield fields # gather properties of locus locus_chrom = transcripts[0].chrom locus_start = transcripts[0].start locus_end = max(tx.end for tx in transcripts) logging.debug("[LOCUS] %s:%d-%d %d transcripts" % (locus_chrom, locus_start, locus_end, len(transcripts))) locus_id_str = "L%d" % (locus_id_value_obj.next()) # filter transcripts logging.debug("\tFiltering transcripts") transcripts = filter_transcripts(transcripts, config.min_transcript_length, config.guided) # build transcript graphs for G, strand, strand_transcripts in \ create_transcript_graphs(transcripts): # output bedgraph if config.create_bedgraph: for fields in get_bedgraph_lines(locus_chrom, G): print >>bedgraph_filehs[strand], '\t'.join(map(str,fields)) # process transcript graphs for Gsub, strand, partial_paths in \ prune_transcript_graph(G, strand, strand_transcripts, config.min_trim_length, config.trim_utr_fraction, config.trim_intron_fraction): logging.debug("Subgraph %s:%d-%d(%s) %d nodes %d paths" % (locus_chrom, locus_start, locus_end, strand_int_to_str(strand), len(Gsub), len(partial_paths))) # assemble subgraph assemble_gene(locus_chrom, locus_id_str, gene_id_value_obj, tss_id_value_obj, t_id_value_obj, Gsub, strand, partial_paths, config, gtf_fileh, bed_fileh)
def test_assembler1(self): # setup correct transcripts PATH_ABCDE = tuple([ Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(600, 700), Exon(800, 900) ]) PATH_ACE = tuple([Exon(0, 100), Exon(400, 500), Exon(800, 900)]) PATH_ABCE = tuple( [Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(800, 900)]) PATH_ACDE = tuple( [Exon(0, 100), Exon(400, 500), Exon(600, 700), Exon(800, 900)]) # read transcripts transcripts = read_first_locus("assemble1.gtf", score_attr="score") GG = get_transcript_graphs(transcripts) G, tmap = GG[POS_STRAND] # set transcript scores tmap["ABCDE"].score = 2.0 tmap["ACE"].score = 1.0 tmap["ABCE"].score = 1.0 tmap["ACDE"].score = 1.0 # set assembly parameter kmax = 2 # assemble GS = list( prune_transcript_graph(G, POS_STRAND, tmap, min_trim_length=0, trim_utr_fraction=0, trim_intron_fraction=0)) Gsub, strand, partial_paths = GS[0] results = list( assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=kmax, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 2) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 3.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 2.0, places=3) # set transcript scores tmap["ABCDE"].score = 4.0 tmap["ACE"].score = 3.0 tmap["ABCE"].score = 2.0 tmap["ACDE"].score = 1.0 # set assembly parameter kmax = 3 # assemble GS = list( prune_transcript_graph(G, POS_STRAND, tmap, min_trim_length=0, trim_utr_fraction=0, trim_intron_fraction=0)) Gsub, strand, partial_paths = GS[0] results = list( assemble_transcript_graph(Gsub, strand, partial_paths, user_kmax=kmax, ksensitivity=0, fraction_major_path=0, max_paths=1000)) self.assertEquals(len(results), 4) self.assertEqual(tuple(results[0].path), PATH_ABCDE) self.assertAlmostEqual(results[0].score, 4.0, places=3) self.assertEqual(tuple(results[1].path), PATH_ACE) self.assertAlmostEqual(results[1].score, 3.0, places=3) self.assertEqual(tuple(results[2].path), PATH_ABCE) self.assertAlmostEqual(results[2].score, 2.0, places=3) self.assertEqual(tuple(results[3].path), PATH_ACDE) self.assertAlmostEqual(results[3].score, 1.0, places=3)