Пример #1
0
def test_offset(complicated_offset):
    pileup = SparseValues(
        #         1  1  2--3  4  5---6   7   8
        np.array([0, 1, 2, 4, 6, 8, 10, 12, 14], dtype="int"),
        np.array([1, 0, 1, 0, 1, 0, 1, 0, 1], dtype="bool"))
    holes = HolesCleaner(complicated_offset, pileup, 3).run()
    true = SparseValues([0, 8, 10], [1, 0, 1])
    assert holes == true
Пример #2
0
def test_internals():
    graph = obg.GraphWithReversals({101: obg.Block(100)}, {101: []})

    pileup = SparseValues([0, 10, 19, 30, 41, 50], [1, 0, 1, 0, 1, 0])
    cleaned = HolesCleaner(graph, pileup, 10).run()
    true = SparseValues([0, 30, 41, 50], [1, 0, 1, 0])

    assert cleaned == true
Пример #3
0
def test_single_peak():
    indices = np.array([0, 3, 32], dtype="int")
    values = np.array([False, True, False], dtype="bool")
    pileup = SparseValues(indices, values)
    indices = np.arange(0, 40, 2)
    values = (np.arange(0, 40, 2) + 1) % 5
    score_pileup = SparseValues(indices, values)
    score_pileup.track_size = 100
    graph.node_indexes = np.arange(0, 110, 10)
    max_paths = SparseMaxPaths(pileup, graph, score_pileup)
    print(max_paths.run())
Пример #4
0
def convert_old_sparse(old_sparse):
    graph = old_sparse.graph
    indices = [0]
    values = [0]
    for key in sorted(old_sparse.data.keys()):
        vi = old_sparse.data[key]
        node_idx = graph.node_indexes[key - graph.min_node]
        indices.append(node_idx)
        values.append(vi.start_value)
        indices.extend(vi.indexes + node_idx)
        values.extend(vi.values)

    sv = SparseValues(indices, values, sanitize=True)
    sv.track_size = graph.node_indexes[-1]
    return sv
Пример #5
0
def test_long_hole():
    pileup = SparseValues([0, 5, 95], [True, False, True])
    graph = obg.GraphWithReversals({i: obg.Block(1)
                                    for i in range(1, 101)},
                                   {i: [i + 1]
                                    for i in range(1, 100)})
    holes = HolesCleaner(graph, pileup, 56).run()
    assert holes == pileup
Пример #6
0
    def test_sample_equals_control_one_node(self):
        sample = from_intervals(self.graph, [Interval(0, 3, [2])])
        control = from_intervals(self.graph, [Interval(0, 3, [1, 2])])

        finder = PValuesFinder(sample, control)
        p_values = finder.get_p_values_pileup()
        correct = SparseValues([0, 3, 6], [0, -np.log10(0.26424), 0])
        self.assertEqual(p_values, correct)
Пример #7
0
def test_holes_cleaner():
    indices = np.array(
        [80, 100, 180, 220, 240, 250, 300, 400, 500, 520, 610, 810])
    values = np.array([(i % 2) for i, _ in enumerate(indices)])
    pileup = SparseValues(indices, values)
    graph = obg.GraphWithReversals({i + 1: obg.Block(100)
                                    for i in range(10)},
                                   {i: [i + 1]
                                    for i in range(1, 10)})
    # graph.node_indexes = np.arange(0, 1001, 100)
    holes = HolesCleaner(graph, pileup, 10).run()
    print(holes)
Пример #8
0
def test_simple_peak():
    pileup = SparseValues([0, 5, 35], [False, True, False])
    score_pileup = SparseValues([0, 5, 10, 20, 30, 35], [0, 1, 2, 3, 4, 0])
    score_pileup.track_size = 100
    pileup.track_size = 100
    max_paths = SparseMaxPaths(pileup, graph, score_pileup).run()
    assert max_paths == [Peak(5, 5, [1, 3, 4], graph=graph)]
    def test_get_summits(self):

        qvalues = SparseValues(np.array([0]), np.array([3]))
        qvalues.track_size = 22
        qvalues.to_sparse_files("tests/test_qvalues")

        run_argument_parser([
            "create_ob_graph", "-o", "tests/testgraph.obg",
            "tests/vg_test_graph.json"
        ])
        max_paths = PeakCollection([Peak(0, 2, [1, 2], score=3)])
        PeakFasta(self.correct_sequence_graph).write_max_path_sequences(
            "tests/test_max_paths.fasta", max_paths)

        run_argument_parser([
            "get_summits", "-g", "tests/testgraph.obg",
            "tests/test_max_paths.fasta", "tests/test_qvalues", "2"
        ])

        result = PeakCollection.from_fasta_file(
            "tests/test_max_paths_summits.fasta")
        self.assertEqual(result.intervals[0], Peak(2, 6, [1]))
        self.assertEqual(result.intervals[0].sequence.lower(), "tccc")
Пример #10
0
def test_offset_end_peak():
    graph = offsetgraph()
    pileup = SparseValues([0, 85], [False, True])
    score_pileup = SparseValues([0, 85], [0, 10])
    score_pileup.track_size = 100
    pileup.track_size = 100
    max_paths = SparseMaxPaths(pileup, graph, score_pileup).run()
    print(max_paths)
    assert max_paths == [Peak(5, 10, [109, 110], graph=graph)]
Пример #11
0
def test_trailing_zeros():
    graph = offsetgraph()
    pileup = SparseValues([0, 5, 35], [False, True, False])
    score_pileup = SparseValues([0, 15, 30], [0, 10, 0])
    score_pileup.track_size = 100
    pileup.track_size = 100
    max_paths = SparseMaxPaths(pileup, graph, score_pileup).run()
    print(max_paths)
    assert max_paths == [Peak(5, 5, [101, 103, 104], graph=graph)]
Пример #12
0
    def test_to_from_file(self):

        indexes = np.array([1, 2, 3, 4, 5, 6, 7, 8])
        values = np.array([1, 2, 3, 4, 5, 6, 7, 8])

        sv = SparseValues(indexes, values)
        sv.track_size = 10
        sv.to_sparse_files("test_sparsevalues.tmp")

        new = sv.from_sparse_files("test_sparsevalues.tmp")
        self.assertEqual(sv, new)
Пример #13
0
def test_multiple_peak():
    graph = complicated_offset()
    pileup = SparseValues([0, 1, 7, 8, 10, 12, 14], [0, 1, 0, 1, 0, 1, 0])
    score_pileup = SparseValues([0, 4], [5, 6])
    score_pileup.track_size = 20
    pileup.track_size = 20
    max_paths = SparseMaxPaths(pileup, graph, score_pileup).run()
    max_paths.sort(key=lambda x: x.region_paths[0])
    print(max_paths)
    assert max_paths == [
        Peak(1, 1, [101, 103, 104], graph=graph),
        Peak(0, 2, [105, 107], graph=graph)
    ]
Пример #14
0
import logging
logging.basicConfig(level=logging.DEBUG)
from offsetbasedgraph import Graph, NumpyIndexedInterval
from offsetbasedgraph.vcfmap import load_variant_maps
from graph_peak_caller.postprocess.maxpaths import SparseMaxPaths
from graph_peak_caller.sparsediffs import SparseValues
from graph_peak_caller.peakcollection import PeakCollection

chrom = sys.argv[1]
fragment_length = int(sys.argv[2])

ref = NumpyIndexedInterval.from_file("/data/bioinf/tair2/" + chrom + "_linear_pathv2.interval")


graph = Graph.from_file("/data/bioinf/tair2/" + chrom + ".nobg")
direct = SparseValues.from_sparse_files(chrom + "_direct_pileup")
filtered_peaks = SparseValues.from_sparse_files(chrom + "_hole_cleaned")
variant_map = load_variant_maps(chrom, "/data/bioinf/tair2/")

max_paths, sub_graphs = SparseMaxPaths(filtered_peaks, graph, direct, ref, variant_map).run()
long_maxpaths = [path for path in max_paths if path.length() >= fragment_length]

for max_path in long_max_paths:
    assert max_path.length() > 0, "Max path %s has negative length" % max_path
    score = np.max(self.q_values.get_interval_values(max_path))
    max_path.set_score(score)
    assert not np.isnan(score), "Score %s is nan" % score


PeakCollection(long_maxpaths).to_file(chrom + "_max_paths.intervalcollection", text_file=True)
Пример #15
0
def nonvar_pileup():
    sv = SparseValues([0, 15, 45, 50, 60, 70], [0, 1, 4, 1, 3, 5])
    sv.track_size = 80
    return sv
Пример #16
0
def pileup():
    sv = SparseValues([0, 15, 30, 45, 50, 60, 70], [0, 1, 2, 6, 2, 3, 6])
    sv.track_size = 80
    return sv
Пример #17
0
def test_end_hole():
    pileup = SparseValues([0, 5, 10], [False, True, False])
    graph = obg.GraphWithReversals({1: obg.Block(12)}, {1: []})
    holes = HolesCleaner(graph, pileup, 4).run()
    assert holes == pileup
Пример #18
0
def areas():
    sv = SparseValues([0, 15, 42, 48, 72], [0, 1, 0, 1, 0])
    sv.track_size = 80
    return sv
Пример #19
0
def test_non_touched_mid(small_graph):
    touched_nodes = set([101, 102, 103, 104, 105, 106])
    pileup = SparseValues([0, 4, 22, 28, 56], [1, 0, 1, 0, 1])
    cleaned = HolesCleaner(small_graph, pileup, 20, touched_nodes).run()
    true = SparseValues([0, 30, 40], [1, 0, 1])
    assert cleaned == true
Пример #20
0
def test_touched_edge(small_graph):
    touched_nodes = set([101, 103, 106])
    pileup = SparseValues([0, 4, 22, 28, 56], [1, 0, 1, 0, 1])
    cleaned = HolesCleaner(small_graph, pileup, 4, touched_nodes).run()
    assert cleaned == pileup
Пример #21
0
def offset_areas():
    sv = SparseValues([0, 10, 11, 20, 29, 31], [1, 0, 1, 0, 1, 0])
    sv.track_size = 80
    return sv