Exemplo n.º 1
0
    def test_no_match(self):
        """ Example with no FSM match """

        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(cursor, build)

        chrom = "chr1"
        positions = [1, 100, 500, 600] 
        strand = "+"
        edge_IDs = [2]
        vertex_IDs = [2,3,4,5]
        v_novelty = [0, 0, 0, 0]

        all_matches = talon.search_for_ISM(edge_IDs, transcript_dict)

        gene_ID, transcript_ID, novelty, start_end_info = talon.process_FSM(chrom,
                                                            positions, strand, edge_IDs,
                                                            vertex_IDs, all_matches,
                                                            gene_starts, gene_ends,
                                                            edge_dict,
                                                            location_dict, run_info)

        assert gene_ID == transcript_ID == None 
        conn.close()       
Exemplo n.º 2
0
    def test_FSM_end_diff(self):
        """ Example where the transcript is an FSM but has a difference on
            the ends large enough to be novel.
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        orig_vertices = run_info['vertex']
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(cursor, build)

        chrom = "chr2"
        positions = [1, 100, 500, 600, 900, 1301] #Last postion is > 300bp away
        strand = "+"
        edge_IDs = [13, 14, 15]
        vertex_IDs = [14, 15, 16, 17] 
        v_novelty = [0, 0, 0, 0]
  
        all_matches = talon.search_for_ISM(edge_IDs, transcript_dict)

        gene_ID, transcript_ID, novelty, start_end_info = talon.process_FSM(chrom,
                                                            positions, strand, edge_IDs,
                                                            vertex_IDs, all_matches,
                                                            gene_starts, gene_ends,
                                                            edge_dict,
                                                            location_dict, run_info) 

        correct_gene_ID = fetch_correct_ID("TG2", "gene", cursor)
        correct_transcript_ID = fetch_correct_ID("TG2-001", "transcript", cursor)
        assert gene_ID == correct_gene_ID
        assert transcript_ID == correct_transcript_ID
        assert start_end_info["end_vertex"] == orig_vertices + 1
        conn.close()
Exemplo n.º 3
0
    def test_ISM_prefix(self):
        """ Example where the transcript is a prefix ISM with a novel start
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(
            cursor, build)
        orig_exons = run_info["edge"]

        chrom = "chr1"
        strand = "+"
        positions = [1, 100, 500, 600]
        edge_IDs = [2]
        vertex_IDs = [2, 3]
        v_novelty = [0, 0]

        all_matches = talon.search_for_ISM(edge_IDs, transcript_dict)
        gene_ID, transcript_ID, novelty, start_end_info = talon.process_ISM(
            chrom, positions, strand, edge_IDs, vertex_IDs, all_matches,
            transcript_dict, gene_starts, gene_ends, edge_dict, location_dict,
            run_info)

        correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor)
        assert gene_ID == correct_gene_ID
        assert start_end_info["vertex_IDs"] == [1, 2, 3, 4]
        assert start_end_info["edge_IDs"] == [1, 2, 3]
        conn.close()
Exemplo n.º 4
0
    def test_ISM_suffix(self):
        """ Example where the transcript is an ISM with suffix
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(
            cursor, build)

        chrom = "chr1"
        strand = "+"
        positions = [500, 600, 900, 1000]
        edge_IDs = [4]
        vertex_IDs = [4, 5]
        v_novelty = [0, 0]

        all_matches = talon.search_for_ISM(edge_IDs, transcript_dict)
        gene_ID, transcript_ID, novelty, start_end_info = talon.process_ISM(
            chrom, positions, strand, edge_IDs, vertex_IDs, all_matches,
            transcript_dict, gene_starts, gene_ends, edge_dict, location_dict,
            run_info)

        correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor)

        assert gene_ID == correct_gene_ID
        assert start_end_info["vertex_IDs"] == [3, 4, 5, 6]
        assert start_end_info["edge_IDs"] == [3, 4, 5]
        assert start_end_info["start_novelty"] == 0  # because the exon is known
        assert start_end_info["end_novelty"] == 0
        assert transcript_dict[frozenset(start_end_info["edge_IDs"])] != None
        conn.close()
Exemplo n.º 5
0
    def test_find_no_match(self):
        """ Example where the toy transcript database contains no matches
            for the edge set.
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        transcript_dict = talon.make_transcript_dict(cursor, build)
        conn.close()

        edges = (100, 200, 300)
        matches = talon.search_for_ISM(edges, transcript_dict)

        # Make sure that no match got returned
        assert matches == None
Exemplo n.º 6
0
    def test_find_monoexon_match(self):
        """ Input is a sinlge exon that matches part of an existing transcript
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        transcript_dict = talon.make_transcript_dict(cursor, build)

        edges = (14, )
        matches = talon.search_for_ISM(edges, transcript_dict)

        # Make sure that correct match got returned
        correct_gene_ID = fetch_correct_ID("TG2", "gene", cursor)

        assert matches[0]["gene_ID"] == correct_gene_ID
        conn.close()
Exemplo n.º 7
0
    def test_find_match(self):
        """ Example where the toy transcript database contains exactly one  
            ISM match for the transcript.
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        transcript_dict = talon.make_transcript_dict(cursor, build)

        edges = (2, 3)
        matches = talon.search_for_ISM(edges, transcript_dict)

        # Make sure that correct match got returned
        correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor)

        assert matches[0]["gene_ID"] == correct_gene_ID
        conn.close()
Exemplo n.º 8
0
    def test_no_match(self):
        """ Example with no ISM match """

        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)
        transcript_dict = talon.make_transcript_dict(cursor, build)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(
            cursor, build)

        chrom = "chr1"
        strand = "+"
        positions = [1, 100, 900, 1000]
        edge_IDs = [200]
        vertex_IDs = [2, 5]
        v_novelty = [0, 0]

        all_matches = talon.search_for_ISM(edge_IDs, transcript_dict)
        assert all_matches == None
        conn.close()
Exemplo n.º 9
0
    def test_FSM_perfect(self):
        """ Example where the transcript is a perfect full splice match.
        """
        conn, cursor = get_db_cursor()
        build = "toy_build"
        edge_dict = talon.make_edge_dict(cursor)
        location_dict = talon.make_location_dict(build, cursor)
        run_info = talon.init_run_info(cursor, build)        
        transcript_dict = talon.make_transcript_dict(cursor, build)
        gene_starts, gene_ends = talon.make_gene_start_and_end_dict(cursor, build)

        chrom = "chr1"
        positions = [1, 100, 500, 600, 900, 1010]
        strand = "+"
        edge_IDs = [2, 3, 4]
        vertex_IDs = [2, 3, 4, 5]
        v_novelty = [0, 0, 0, 0]

        all_matches = talon.search_for_ISM(edge_IDs, transcript_dict)

        gene_ID, transcript_ID, novelty, start_end_info = talon.process_FSM(chrom, 
                                                            positions, strand, edge_IDs,
                                                            vertex_IDs, all_matches,
                                                            gene_starts, gene_ends,
                                                            edge_dict,
                                                            location_dict, run_info)

        correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor) 
        correct_transcript_ID = fetch_correct_ID("TG1-001", "transcript", cursor)
        assert gene_ID == correct_gene_ID
        assert transcript_ID == correct_transcript_ID
        assert novelty == []
        assert start_end_info["start_vertex"] == 1
        assert start_end_info["end_vertex"] == 6
        assert start_end_info["diff_3p"] == 10
        conn.close()