def test_get_weight(db, bam_file, splice_file): db.get_read_statistics("scaffold_1", bam_file, "H3K4me3") db.get_splice_statistics("scaffold_1", splice_file, "RNAseq") from pita.dbcollection import DbCollection c = DbCollection(db) model = list(c.get_connected_models())[0][0] w = c.get_weight(model, "H3K4me3", "all") assert 365 == w w = c.get_weight(model, None, "length") assert 60100 == w w = c.get_weight(model, "H3K4me3", "rpkm") assert abs(1163.9 - w) < 0.1 w = c.get_weight(model, "H3K4me3", "weighted") assert abs(0.01821963394342762 - w) < 0.0001 w = c.get_weight(model, "H3K4me3", "total_rpkm") assert abs(4292.832 - w) < 0.1 w = c.get_weight(model, "H3K4me3", "mean_exon") assert abs(1430.944 - w) < 0.1 w = c.get_weight(model, "RNAseq", "splice") assert 24 == w w = c.get_weight(model, "H3K4me3", "first") assert 64 == w w = c.get_weight(model, None, "evidence") assert 1 == w
def test_variants(db, variant_track): from pita.dbcollection import DbCollection from pita.io import read_bed_transcripts from pita.util import model_to_bed for tname, source, exons in read_bed_transcripts(open(variant_track)): db.add_transcript("{0}{1}{2}".format("t1", "|", tname), source, exons) c = DbCollection(db) best_model = [m for m in c.get_connected_models()][0][0] cuts = [str(e) for e in c.get_node_cuts(best_model)] assert ["chr1:800+900", "chr1:1400+1500"] == cuts best_variant = c.get_best_variant(best_model, [{"weight":1,"type":"length","name":"length"}]) s = [str(s) for s in best_variant] assert ["chr1:100+200", "chr1:400+700", "chr1:800+900", "chr1:1000+1300", "chr1:1400+1500", "chr1:1600+1900", "chr1:2000+2100"] == s
def test_long_exon_filter(db, t1, t2): from pita.dbcollection import DbCollection from pita.io import read_bed_transcripts for tname, source, exons in read_bed_transcripts(open(t1)): db.add_transcript("{0}{1}{2}".format("t1", "|", tname), source, exons) for tname, source, exons in read_bed_transcripts(open(t2)): db.add_transcript("{0}{1}{2}".format("t2", "|", tname), source, exons) c = DbCollection(db, chrom="chr1") c.filter_long(evidence=1) models = [] for cluster in c.get_connected_models(): for m in cluster: models.append(m) assert [1,3,5] == sorted([len(m) for m in models])
def test_db_collection(db): from pita.dbcollection import DbCollection c = DbCollection(db) for model in c.get_connected_models(): print model