def test_split_transfrag(): loci = read_gtf("splice_sites.gtf") interval, gtf_lines = loci[0] t_dict = Transfrag.parse_gtf(gtf_lines) sg = SpliceGraph.create(t_dict.values()) boundaries = array("i", sg._find_node_boundaries()) # check nodes t = t_dict["A"] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((10, 100), (200, 250), (250, 300), (400, 525)) t = t_dict["B"] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((10, 100), (250, 300), (400, 525)) t = t_dict["C"] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((150, 200), (200, 250), (250, 300), (400, 525)) t = t_dict["D"] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((375, 400), (400, 525))
def test_split_transfrag(): loci = read_gtf('splice_sites.gtf') interval, gtf_lines = loci[0] t_dict = Transfrag.parse_gtf(gtf_lines) sg = SpliceGraph.create(t_dict.values()) boundaries = array('i', sg._find_node_boundaries()) # check nodes t = t_dict['A'] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((10, 100), (200, 250), (250, 300), (400, 525)) t = t_dict['B'] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((10, 100), (250, 300), (400, 525)) t = t_dict['C'] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((150, 200), (200, 250), (250, 300), (400, 525)) t = t_dict['D'] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((375, 400), (400, 525))
def test_create_locus(): loci = read_gtf('splice_sites.gtf') assert len(loci) == 1 interval, gtf_lines = loci[0] assert interval == ('chr1', 10, 525) t_dict = Transfrag.parse_gtf(gtf_lines) locus = Locus.create(t_dict.values()) assert locus.chrom == 'chr1' assert locus.start == 10 assert locus.end == 525 a = locus.get_expr_data(49, 51, Strand.POS) assert np.array_equal(a, [1.0, 2.0]) a = locus.get_expr_data(150, 151, Strand.POS) assert np.array_equal(a, [1.0]) a = locus.get_expr_data(499, 501, Strand.POS) assert np.array_equal(a, [3.0, 1.0]) with pytest.raises(TacoError): locus.get_expr_data(5, 15, Strand.POS) with pytest.raises(TacoError): locus.get_expr_data(520, 530, Strand.POS)
def test_parse_loci(): loci = read_gtf('parse_loci.gtf') assert len(loci) == 3 assert loci[0][0] == ('chr1', 10, 50) assert loci[1][0] == ('chr1', 50, 200) assert loci[2][0] == ('chr2', 100, 200)
def test_multi_strand1(): # read gtf and test basic values loci = read_gtf("multi_strand1.gtf") assert len(loci) == 1 interval, gtf_lines = loci[0] assert interval == ("chr1", 100, 1000) t_dict = Transfrag.parse_gtf(gtf_lines) assert len(t_dict) == 5 locus = Locus.create(t_dict.values()) assert locus.chrom == "chr1" assert locus.start == 100 assert locus.end == 1000 # raise exception when creating with multiple strands with pytest.raises(TacoError): SpliceGraph.create(t_dict.values()) transfrags_pos = locus.get_transfrags(Strand.POS) transfrags_neg = locus.get_transfrags(Strand.NEG) sgpos = SpliceGraph.create(transfrags_pos) sgneg = SpliceGraph.create(transfrags_neg) # test assert sgpos.chrom == "chr1" assert sgpos.start == 100 assert sgpos.end == 650 assert sgpos.strand == Strand.POS assert sgpos.ref_start_sites == [150] assert sgpos.ref_stop_sites == [600] with pytest.raises(TacoError): sgpos.get_expr_data(90, 110) with pytest.raises(TacoError): sgpos.get_expr_data(650, 655) assert np.array_equal(sgpos.get_expr_data(100, 105), np.ones(5)) assert sgneg.chrom == "chr1" assert sgneg.start == 350 assert sgneg.end == 1000 assert sgneg.strand == Strand.NEG assert sgneg.ref_start_sites == [1000] assert sgneg.ref_stop_sites == [350] with pytest.raises(TacoError): sgneg.get_expr_data(340, 350) with pytest.raises(TacoError): sgneg.get_expr_data(1000, 1010) assert np.array_equal(sgneg.get_expr_data(400, 405), np.ones(5)) assert np.array_equal(sgneg.get_expr_data(945, 950), np.zeros(5)) assert np.array_equal(sgneg.get_expr_data(950, 955), np.ones(5)) assert np.array_equal(sgneg.get_expr_data(980, 985), np.zeros(5)) # test locus boundaries bpos = tuple(sgpos._find_node_boundaries()) assert bpos == tuple((100, 200, 300, 400, 650)) bneg = tuple(sgneg._find_node_boundaries()) assert bneg == tuple((350, 400, 500, 950, 980, 1000)) # added guided ends/assembly to use boundaries from reference lpos = SpliceGraph.create(transfrags_pos, guided_ends=True, guided_assembly=True) bpos = tuple(lpos._find_node_boundaries()) assert bpos == tuple((100, 150, 200, 300, 400, 500, 600, 650)) lneg = SpliceGraph.create(transfrags_neg, guided_ends=True, guided_assembly=True) bneg = tuple(lneg._find_node_boundaries()) assert bneg == tuple((350, 400, 500, 750, 900, 950, 980, 1000))
def test_multi_strand1(): # read gtf and test basic values loci = read_gtf('multi_strand1.gtf') assert len(loci) == 1 interval, gtf_lines = loci[0] assert interval == ('chr1', 100, 1000) t_dict = Transfrag.parse_gtf(gtf_lines) assert len(t_dict) == 5 locus = Locus.create(t_dict.values()) assert locus.chrom == 'chr1' assert locus.start == 100 assert locus.end == 1000 # raise exception when creating with multiple strands with pytest.raises(TacoError): SpliceGraph.create(t_dict.values()) transfrags_pos = locus.get_transfrags(Strand.POS) transfrags_neg = locus.get_transfrags(Strand.NEG) sgpos = SpliceGraph.create(transfrags_pos) sgneg = SpliceGraph.create(transfrags_neg) # test assert sgpos.chrom == 'chr1' assert sgpos.start == 100 assert sgpos.end == 650 assert sgpos.strand == Strand.POS assert sgpos.ref_start_sites == [150] assert sgpos.ref_stop_sites == [600] with pytest.raises(TacoError): sgpos.get_expr_data(90, 110) with pytest.raises(TacoError): sgpos.get_expr_data(650, 655) assert np.array_equal(sgpos.get_expr_data(100, 105), np.ones(5)) assert sgneg.chrom == 'chr1' assert sgneg.start == 350 assert sgneg.end == 1000 assert sgneg.strand == Strand.NEG assert sgneg.ref_start_sites == [1000] assert sgneg.ref_stop_sites == [350] with pytest.raises(TacoError): sgneg.get_expr_data(340, 350) with pytest.raises(TacoError): sgneg.get_expr_data(1000, 1010) assert np.array_equal(sgneg.get_expr_data(400, 405), np.ones(5)) assert np.array_equal(sgneg.get_expr_data(945, 950), np.zeros(5)) assert np.array_equal(sgneg.get_expr_data(950, 955), np.ones(5)) assert np.array_equal(sgneg.get_expr_data(980, 985), np.zeros(5)) # test locus boundaries bpos = tuple(sgpos._find_node_boundaries()) assert bpos == tuple((100, 200, 300, 400, 650)) bneg = tuple(sgneg._find_node_boundaries()) assert bneg == tuple((350, 400, 500, 950, 980, 1000)) # added guided ends/assembly to use boundaries from reference lpos = SpliceGraph.create(transfrags_pos, guided_ends=True, guided_assembly=True) bpos = tuple(lpos._find_node_boundaries()) assert bpos == tuple((100, 150, 200, 300, 400, 500, 600, 650)) lneg = SpliceGraph.create(transfrags_neg, guided_ends=True, guided_assembly=True) bneg = tuple(lneg._find_node_boundaries()) assert bneg == tuple((350, 400, 500, 750, 900, 950, 980, 1000))