Exemplo n.º 1
0
    def test_match(self):
        """ Example where the transcript is a moniexonic match.
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, build)
        talon.make_temp_monoexonic_transcript_table(cursor, build)
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        vertex_2_gene = talon.make_vertex_2_gene_dict(cursor)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(cursor, build)

        chrom = "chr4"
        strand = "-"
        positions = ( 3900, 1100 )

        annotation = talon.identify_monoexon_transcript(chrom, positions, 
                                               strand, cursor,
                                               location_dict, edge_dict,
                                               transcript_dict, vertex_2_gene,
                                               gene_starts, gene_ends, run_info)

        correct_gene_ID = fetch_correct_ID("TG6", "gene", cursor)
        correct_transcript_ID = fetch_correct_ID("TG6-001", "transcript", cursor)
        assert annotation['gene_ID'] == correct_gene_ID
        assert annotation['start_delta'] == 100
        assert annotation['end_delta'] == -100

        conn.close()
Exemplo n.º 2
0
    def test_ISM_suffix(self):
        """ Example where the transcript is an ISM with suffix
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(
            cursor, build)

        chrom = "chr1"
        strand = "+"
        positions = [500, 600, 900, 1000]
        edge_IDs = [4]
        vertex_IDs = [4, 5]
        v_novelty = [0, 0]

        all_matches = talon.search_for_ISM(edge_IDs, transcript_dict)
        gene_ID, transcript_ID, novelty, start_end_info = talon.process_ISM(
            chrom, positions, strand, edge_IDs, vertex_IDs, all_matches,
            transcript_dict, gene_starts, gene_ends, edge_dict, location_dict,
            run_info)

        correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor)

        assert gene_ID == correct_gene_ID
        assert start_end_info["vertex_IDs"] == [3, 4, 5, 6]
        assert start_end_info["edge_IDs"] == [3, 4, 5]
        assert start_end_info["start_novelty"] == 0  # because the exon is known
        assert start_end_info["end_novelty"] == 0
        assert transcript_dict[frozenset(start_end_info["edge_IDs"])] != None
        conn.close()
Exemplo n.º 3
0
    def test_partial_match_3prime(self):
        """ Example where the transcript is short, so it overlaps the
            annotated transcript but is not an accepted match.
            the end should get assigned to the annotated end, but the end is
            novel """

        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, build)
        talon.make_temp_monoexonic_transcript_table(cursor, build)
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        vertex_2_gene = talon.make_vertex_2_gene_dict(cursor)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(cursor, build)

        chrom = "chr4"
        strand = "-"
        positions = ( 2000, 1100 )

        annotation = talon.identify_monoexon_transcript(chrom, positions,
                                               strand, cursor,
                                               location_dict, edge_dict,
                                               transcript_dict, vertex_2_gene,
                                               gene_starts, gene_ends, run_info)

        correct_gene_ID = fetch_correct_ID("TG6", "gene", cursor)
        assert annotation['gene_ID'] == correct_gene_ID
        assert annotation['start_delta'] == None
        assert annotation['end_delta'] == -100

        conn.close()
Exemplo n.º 4
0
    def test_no_match(self):
        """ Example with no FSM match """

        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(cursor, build)

        chrom = "chr1"
        positions = [1, 100, 500, 600] 
        strand = "+"
        edge_IDs = [2]
        vertex_IDs = [2,3,4,5]
        v_novelty = [0, 0, 0, 0]

        all_matches = talon.search_for_ISM(edge_IDs, transcript_dict)

        gene_ID, transcript_ID, novelty, start_end_info = talon.process_FSM(chrom,
                                                            positions, strand, edge_IDs,
                                                            vertex_IDs, all_matches,
                                                            gene_starts, gene_ends,
                                                            edge_dict,
                                                            location_dict, run_info)

        assert gene_ID == transcript_ID == None 
        conn.close()       
Exemplo n.º 5
0
    def test_ISM_prefix(self):
        """ Example where the transcript is a prefix ISM with a novel start
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(
            cursor, build)
        orig_exons = run_info["edge"]

        chrom = "chr1"
        strand = "+"
        positions = [1, 100, 500, 600]
        edge_IDs = [2]
        vertex_IDs = [2, 3]
        v_novelty = [0, 0]

        all_matches = talon.search_for_ISM(edge_IDs, transcript_dict)
        gene_ID, transcript_ID, novelty, start_end_info = talon.process_ISM(
            chrom, positions, strand, edge_IDs, vertex_IDs, all_matches,
            transcript_dict, gene_starts, gene_ends, edge_dict, location_dict,
            run_info)

        correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor)
        assert gene_ID == correct_gene_ID
        assert start_end_info["vertex_IDs"] == [1, 2, 3, 4]
        assert start_end_info["edge_IDs"] == [1, 2, 3]
        conn.close()
Exemplo n.º 6
0
    def test_FSM_end_diff(self):
        """ Example where the transcript is an FSM but has a difference on
            the ends large enough to be novel.
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        orig_vertices = run_info['vertex']
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(cursor, build)

        chrom = "chr2"
        positions = [1, 100, 500, 600, 900, 1301] #Last postion is > 300bp away
        strand = "+"
        edge_IDs = [13, 14, 15]
        vertex_IDs = [14, 15, 16, 17] 
        v_novelty = [0, 0, 0, 0]
  
        all_matches = talon.search_for_ISM(edge_IDs, transcript_dict)

        gene_ID, transcript_ID, novelty, start_end_info = talon.process_FSM(chrom,
                                                            positions, strand, edge_IDs,
                                                            vertex_IDs, all_matches,
                                                            gene_starts, gene_ends,
                                                            edge_dict,
                                                            location_dict, run_info) 

        correct_gene_ID = fetch_correct_ID("TG2", "gene", cursor)
        correct_transcript_ID = fetch_correct_ID("TG2-001", "transcript", cursor)
        assert gene_ID == correct_gene_ID
        assert transcript_ID == correct_transcript_ID
        assert start_end_info["end_vertex"] == orig_vertices + 1
        conn.close()
Exemplo n.º 7
0
    def test_FSM_perfect(self):
        """ Example where the transcript is a perfect full splice match.
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, build)
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        vertex_2_gene = talon.make_vertex_2_gene_dict(cursor)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(
            cursor, build)

        chrom = "chr1"
        strand = "+"
        positions = [1, 100, 500, 600, 900, 1000]

        annotation = talon.identify_transcript(chrom, positions, strand,
                                               cursor, location_dict,
                                               edge_dict, transcript_dict,
                                               vertex_2_gene, gene_starts,
                                               gene_ends, run_info)

        correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor)
        correct_transcript_ID = fetch_correct_ID("TG1-001", "transcript",
                                                 cursor)
        assert annotation['gene_ID'] == correct_gene_ID
        assert annotation['transcript_ID'] == correct_transcript_ID
        assert annotation['transcript_novelty'] == []
        conn.close()
Exemplo n.º 8
0
    def test_ISM_internal(self):
        """ Example where the transcript matches an internal exon
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, build)
        talon.make_temp_monoexonic_transcript_table(cursor, build)
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        vertex_2_gene = talon.make_vertex_2_gene_dict(cursor)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(
            cursor, build)

        chrom = "chr1"
        strand = "+"
        positions = (500, 600)

        annotation = talon.identify_monoexon_transcript(
            chrom, positions, strand, cursor, location_dict, edge_dict,
            transcript_dict, vertex_2_gene, gene_starts, gene_ends, run_info)

        correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor)
        novelty_types = [x[-2] for x in annotation['transcript_novelty']]
        assert annotation['gene_ID'] == correct_gene_ID
        assert "ISM_transcript" in novelty_types
        assert annotation['start_delta'] == annotation['end_delta'] == 0
        conn.close()
Exemplo n.º 9
0
    def test_antisense(self):
        """ Example where the transcript is antisense """

        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, build)
        talon.make_temp_monoexonic_transcript_table(cursor, build)
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        vertex_2_gene = talon.make_vertex_2_gene_dict(cursor)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(cursor, build)

        chrom = "chr4"
        strand = "+"
        positions = ( 1300, 3900 )

        annotation = talon.identify_monoexon_transcript(chrom, positions,
                                               strand, cursor,
                                               location_dict, edge_dict,
                                               transcript_dict, vertex_2_gene,
                                               gene_starts, gene_ends, run_info)

        anti_gene_ID = fetch_correct_ID("TG6", "gene", cursor)
        gene_novelty_types = [ x[-2] for x in annotation['gene_novelty']]
        t_novelty_types = [ x[-2] for x in annotation['transcript_novelty']]
        assert annotation['gene_novelty'][0][-1] == "TRUE"
        assert "antisense_gene" in gene_novelty_types
        assert "antisense_transcript" in t_novelty_types

        conn.close() 
Exemplo n.º 10
0
    def test_datasets(self):
        """ Try to add dataset metadata to database """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        run_info = talon.init_run_info(cursor, build)

        datasets = [(1, "toy", "toy", "toy")]
        talon.add_datasets(cursor, datasets)

        # Test if items are there
        query = "SELECT * FROM dataset"
        cursor.execute(query)
        assert len(cursor.fetchall()) == 1
        conn.close()
Exemplo n.º 11
0
    def test_antisense(self):
        """ Example where the vertices are known but there is no same-strand 
            match """

        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        locations = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        vertex_2_gene = talon.make_vertex_2_gene_dict(cursor)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(
            cursor, build)

        # Construct temp novel gene db
        talon.make_temp_novel_gene_table(cursor, "toy_build")

        chrom = "chr1"
        start = 1000
        end = 1
        edge_IDs = [run_info.edge + 1]
        positions = [1000, 900, 100, 1]
        vertex_IDs = [5, 2]
        strand = "-"
        anti_strand = "+"
        v_novelty = (0, 0, 0, 0)

        # Find antisense match
        gene_ID, transcript_ID, gene_novelty, transcript_novelty, start_end_info = \
                                      talon.process_spliced_antisense(chrom, positions,
                                                                  strand, edge_IDs,
                                                                  vertex_IDs,
                                                                  transcript_dict,
                                                                  gene_starts,
                                                                  gene_ends,
                                                                  edge_dict, locations,
                                                                  vertex_2_gene, run_info,
                                                                  cursor)
        #anti_gene_ID = talon.find_gene_match_on_vertex_basis(vertex_IDs,
        #                                                     anti_strand,
        #                                                     vertex_2_gene)

        correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor)
        anti_gene_ID = gene_novelty[-1][-1]
        assert anti_gene_ID == correct_gene_ID
        assert start_end_info["vertex_IDs"] == [6, 5, 2, 1]

        conn.close()
Exemplo n.º 12
0
    def test_no_match(self):
        """ Example where no match exists """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, "toy_build")
        run_info = talon.init_run_info(cursor, build)
        vertex2gene = talon.make_vertex_2_gene_dict(cursor)

        vertex_IDs = (1000, 2000, 3000, 4000)
        strand = "+"

        gene_ID = talon.find_gene_match_on_vertex_basis(
            vertex_IDs, strand, vertex2gene)

        assert gene_ID == None
        conn.close()
Exemplo n.º 13
0
    def test_abundance(self):
        """ Try to add abundance entries to database in batches
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        run_info = talon.init_run_info(cursor, build)

        abundance = [(1, "test", 5), (2, "test", 1), (3, "test", 2)]
        batch_size = 2
        talon.batch_add_abundance(cursor, abundance, batch_size)

        # Test if items are there
        query = "SELECT * FROM abundance"
        cursor.execute(query)
        assert len(cursor.fetchall()) == 3
        conn.close()
Exemplo n.º 14
0
    def test_all_known_edges(self):
        """ Example where the toy transcript database contains matches for all
            vertices.
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        run_info = talon.init_run_info(cursor, build)
        conn.close()

        chrom = "chr1"
        vertex_IDs = [1, 2, 3, 4, 5, 6]
        strand = "+"
        edge_IDs, novelty = talon.match_all_transcript_edges(
            vertex_IDs, strand, edge_dict, run_info)

        assert edge_IDs == (1, 2, 3, 4, 5)
        assert novelty == (0, 0, 0, 0, 0)
    def test_single_match(self):
        """ Example where the interval overlaps exactly one gene """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, "toy_build")
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)

        chrom = "chr1"
        pos = [0, 1500]
        strand = "+"

        gene_ID, match_strand = talon.search_for_overlap_with_gene(
            chrom, pos[0], pos[1], strand, cursor, run_info)

        assert gene_ID == fetch_correct_ID("TG1", "gene", cursor)
        assert match_strand == strand
        conn.close()
Exemplo n.º 16
0
    def test_transcript_annot(self):
        """ Try to add transcript annotation entries to database in batches
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        run_info = talon.init_run_info(cursor, build)

        annot = [(1, "toy", "TALON", "status", "NOVEL"),
                 (2, "toy", "TALON", "status", "NOVEL")]
        batch_size = 2
        talon.batch_add_annotations(cursor, annot, "transcript", batch_size)

        # Test if items are there
        query = "SELECT * FROM transcript_annotations WHERE value = 'NOVEL'"
        cursor.execute(query)
        assert len(cursor.fetchall()) == 2
        conn.close()
Exemplo n.º 17
0
    def test_NNC_type_match(self):
        """ Example where some vertices match a gene, while others don't.
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, "toy_build")
        run_info = talon.init_run_info(cursor, build)
        vertex2gene = talon.make_vertex_2_gene_dict(cursor)

        vertex_IDs = (1, 200, 3, 4, 5, 6)
        strand = "+"

        gene_ID = talon.find_gene_match_on_vertex_basis(
            vertex_IDs, strand, vertex2gene)

        correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor)
        assert gene_ID == correct_gene_ID
        conn.close()
Exemplo n.º 18
0
    def test_gene_update(self):
        """ Try to add novel gene entries to database while ignoring duplicates
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        run_info = talon.init_run_info(cursor, build)
        talon.make_temp_novel_gene_table(cursor, build)
        talon.create_gene("chr4", 1, 1000, "+", cursor, run_info)

        talon.add_genes(cursor)

        # Test if gene with ID 6 is there, but make sure we didn't add
        # duplicates of the other genes
        query = "SELECT * FROM genes"
        gene_IDs = [x['gene_ID'] for x in cursor.execute(query)]
        assert 7 in gene_IDs
        assert len(gene_IDs) == 7
        conn.close()
Exemplo n.º 19
0
    def test_all_known_locations(self):
        """ Example where the toy transcript database contains matches for all
            vertices.
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        orig_vertex_count = run_info['vertex']
        strand = "+"
        conn.close()

        chrom = "chr1"
        pos = [1, 100, 500, 600, 900, 1000]
        vertex_IDs, novelty = talon.match_splice_vertices(
            chrom, pos, strand, location_dict, run_info)

        assert vertex_IDs == [2, 3, 4, 5]
        assert run_info['vertex'] == orig_vertex_count
Exemplo n.º 20
0
    def test_observed(self):
        """ Try to add observed entries to database in batches
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        run_info = talon.init_run_info(cursor, build)

        observed = [(1, 1, 1, "read1", "test", 1, 2, 1, 1, 0, 0, 100),
                    (2, 1, 1, "read2", "test", 1, 2, 1, 1, 0, 0, 100),
                    (3, 1, 1, "read3", "test", 1, 2, 1, 1, 0, 0, 100)]
        batch_size = 1
        talon.batch_add_observed(cursor, observed, batch_size)

        # Test if items are there
        query = "SELECT * FROM observed"
        cursor.execute(query)
        assert len(cursor.fetchall()) == 3
        conn.close()
Exemplo n.º 21
0
    def test_overlap_but_no_vertex_match(self):
        """ Example where the transcript is short, so it overlaps the
            annotated transcript but is not an accepted match.
            the start should get assigned to the annotated end, but the end is
            novel """

        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, build)
        talon.make_temp_monoexonic_transcript_table(cursor, build)
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        vertex_2_gene = talon.make_vertex_2_gene_dict(cursor)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(cursor, build)
        tot_vertices = len(vertex_2_gene)
        query = """ SELECT COUNT(*) FROM temp_monoexon """
        tot_monoexonic = cursor.execute(query).fetchone()[0]

        chrom = "chr4"
        strand = "-"
        positions = ( 2500, 2000 )

        annotation = talon.identify_monoexon_transcript(chrom, positions,
                                               strand, cursor,
                                               location_dict, edge_dict,
                                               transcript_dict, vertex_2_gene,
                                               gene_starts, gene_ends, run_info)

        correct_gene_ID = fetch_correct_ID("TG6", "gene", cursor)
        print(annotation['start_vertex'])
        print(annotation['end_vertex'])
        assert annotation['gene_ID'] == correct_gene_ID
        assert annotation['start_delta'] == None
        assert annotation['end_delta'] == None

        # Now check if the transcript got added to the right data structures
        assert len(vertex_2_gene) == tot_vertices + 2
        assert cursor.execute(query).fetchone()[0] == tot_monoexonic + 1

        conn.close()
Exemplo n.º 22
0
    def test_edgecase_single_base_exon(self):
        """ Example where the first exon is only one basepair long
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)

        chrom = "chr1"
        pos = [1, 1, 500, 600]
        start = pos[0]
        splice_pos = pos[2]
        cutoff = 500
        strand = "+"

        vertex_match, diff = talon.permissive_vertex_search(
            chrom, start, strand, splice_pos, "start", location_dict, run_info)
        assert vertex_match == fetch_correct_vertex_ID(chrom, 1, cursor)
        assert diff == 0
        conn.close()
    def test_same_strand_match_with_two_genes(self):
        """ Example where interval overlaps two genes, one of which is on the 
            same strand. """

        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, "toy_build")
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)

        chrom = "chr1"
        pos = [1500, 910]
        strand = "-"

        gene_ID, match_strand = talon.search_for_overlap_with_gene(
            chrom, pos[0], pos[1], strand, cursor, run_info)

        assert gene_ID == fetch_correct_ID("TG3", "gene", cursor)
        assert match_strand == strand
        conn.close()
    def test_same_strand_match_left_overlap(self):
        """ Example where the overlap is on the same strand. Query start is to 
            the left of the gene, and query end is before the end of the gene. """

        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, "toy_build")
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)

        chrom = "chr1"
        pos = [550, 1700]
        strand = "-"

        gene_ID, match_strand = talon.search_for_overlap_with_gene(
            chrom, pos[0], pos[1], strand, cursor, run_info)

        assert gene_ID == fetch_correct_ID("TG3", "gene", cursor)
        assert match_strand == strand
        conn.close()
Exemplo n.º 25
0
    def test_edge_update(self):
        """ Try to add novel exons and introns. """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        run_info = talon.init_run_info(cursor, build)
        orig_n_edges = run_info.edge

        talon.create_edge(2, 1, "exon", "-", edge_dict, run_info)

        batch_size = 10
        talon.batch_add_edges(cursor, edge_dict, batch_size)

        # Test if the edge table has the correct number of edges now
        query = "SELECT * FROM edge"
        cursor.execute(query)
        edge_IDs = [x['edge_ID'] for x in cursor.fetchall()]
        assert orig_n_edges + 1 in edge_IDs
        assert len(edge_IDs) == orig_n_edges + 1
        conn.close()
    def test_2_genes_same_strand(self):
        """ Example where query overlaps two genes. Must choose the one with 
            more overlap """

        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, "toy_build")
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)

        chrom = "chr1"
        pos = [800, 5050]
        strand = "+"

        gene_ID, match_strand = talon.search_for_overlap_with_gene(
            chrom, pos[0], pos[1], strand, cursor, run_info)

        assert gene_ID == fetch_correct_ID("TG1", "gene", cursor)
        assert match_strand == "+"
        conn.close()
Exemplo n.º 27
0
    def test_location_update(self):
        """ Update locations """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        orig_n_pos = run_info.vertex

        talon.create_vertex("chr4", 2000, run_info, location_dict)

        batch_size = 10
        talon.batch_add_locations(cursor, location_dict, batch_size)

        # Test if the table has the correct number of locations now
        query = "SELECT * FROM location"
        cursor.execute(query)
        loc_IDs = [x['location_ID'] for x in cursor.fetchall()]
        assert orig_n_pos + 1 in loc_IDs
        assert len(loc_IDs) == orig_n_pos + 1
        conn.close()
    def test_antisense_match(self):
        """ Example where interval overlaps one gene in the antisense direction.
        """

        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, "toy_build")
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)

        chrom = "chr1"
        pos = [1400, 2100]
        strand = "+"

        gene_ID, match_strand = talon.search_for_overlap_with_gene(
            chrom, pos[0], pos[1], strand, cursor, run_info)

        assert gene_ID == fetch_correct_ID("TG3", "gene", cursor)
        assert match_strand == "-"
        conn.close()
    def test_no_match(self):
        """ Example where the supplied interval should not match anything
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        talon.make_temp_novel_gene_table(cursor, "toy_build")
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)

        chrom = "chr1"
        pos = [3000, 4000]
        strand = "+"
        gene_ID, match_strand = talon.search_for_overlap_with_gene(
            chrom, pos[0], pos[1], strand, cursor, run_info)
        assert gene_ID == None

        # Should get same results for flipped interval
        gene_ID, match_strand = talon.search_for_overlap_with_gene(
            chrom, pos[0], pos[1], strand, cursor, run_info)
        assert gene_ID == None
        conn.close()
Exemplo n.º 30
0
    def test_beyond_cutoff_distance(self):
        """ Example where the only nearby vertices are beyond the cutoff 
            distance, prompting creation of a new vertex."""

        conn, cursor = get_db_cursor()
        build = "toy_build"
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        conn.close()

        chrom = "chr1"
        pos = [1700, 1500, 1000, 900]
        start = pos[0]
        splice_pos = pos[1]
        run_info.cutoff_5p = 250
        strand = "-"

        vertex_match, diff = talon.permissive_vertex_search(
            chrom, start, strand, splice_pos, "start", location_dict, run_info)

        assert vertex_match == None
        assert diff == None