示例#1
0
def apply_feature_filters_length_test():
    """Test length filter function."""
    # capture -- mean: 1; stdv: 0; median: 1; min: 1; max: 1; len: 6
    capture = [1, 1, 1, 1, 1, 1]

    # Only length filter -- pass (edge case, inclusive high)
    filters = [filtering.LengthFilter(0, 6)]
    pass_filters = filtering.apply_feature_filters(capture, filters)
    assert pass_filters

    # Only length filter -- pass (edge case, inclusive low)
    filters = [filtering.LengthFilter(6, 10)]
    pass_filters = filtering.apply_feature_filters(capture, filters)
    assert pass_filters

    # Only length filter -- fail (too short)
    filters = [filtering.LengthFilter(8, 10)]
    pass_filters = filtering.apply_feature_filters(capture, filters)
    assert not pass_filters

    # Only length filter -- fail (too long)
    filters = [filtering.LengthFilter(0, 5)]
    pass_filters = filtering.apply_feature_filters(capture, filters)
    assert not pass_filters

    # Only length filter -- pass (no filter actually given)
    filters = [filtering.LengthFilter(None, None)]
    pass_filters = filtering.apply_feature_filters(capture, filters)
    assert pass_filters
示例#2
0
def find_captures_4_unfolded_terminal_test():
    """Example capture window contains 1 long terminal capturethat has unfolded.

    Tests: find_captures returns 1 capture
    """
    data_file = "src/tests/data/capture_windows/test_data_capture_window_4.txt.gz"
    data = picoampere_signal_from_data_file(data_file)
    window = Window(5_511_887, 5_604_585)
    signal_threshold_frac = 0.7
    alt_open_channel_pA = 230
    terminal_capture_only = True
    filters = [filtering.LengthFilter(100, None)]
    delay = 0
    end_tol = 0
    channel_number = 2
    captures = segment.find_captures(
        data,
        channel_number,
        window,
        signal_threshold_frac,
        alt_open_channel_pA,
        terminal_capture_only=terminal_capture_only,
        filters=filters,
        delay=delay,
        end_tol=end_tol,
    )
    assert len(captures) == 1
示例#3
0
def find_captures_3_multicapture_nonterminal_test():
    """Example capture window contains 1 long terminal capture & 1 medium capture.

    Tests: find_captures returns...
    3 captures when terminal_capture_only = False
    """
    data_file = "src/tests/data/capture_windows/test_data_capture_window_3.txt.gz"
    data = picoampere_signal_from_data_file(data_file)
    window = Window(1_187_841, 1_280_674)
    actual_captures = [(1_200_088, 1_201_033, False), (1_252_611, 1_280_674, True)]
    signal_threshold_frac = 0.7
    alt_open_channel_pA = 230
    terminal_capture_only = False
    filters = [filtering.LengthFilter(100, None)]
    delay = 3
    end_tol = 0
    channel_number = 2
    captures = segment.find_captures(
        data,
        channel_number,
        window,
        signal_threshold_frac,
        alt_open_channel_pA,
        terminal_capture_only=terminal_capture_only,
        filters=filters,
        delay=delay,
        end_tol=end_tol,
    )
    assert len(captures) == len(actual_captures)
    for test_capture in captures:
        test_start = test_capture.window.start
        test_end = test_capture.window.end
        ejected = test_capture.ejected
        assert (test_start, test_end, ejected) in actual_captures
示例#4
0
def find_captures_3_multicapture_terminal_test():
    """Example capture window contains 1 long terminal capture & 2 medium/short captures.

    Tests: find_captures returns...
    1 capture when terminal_capture_only = True
    """
    data_file = "src/tests/data/capture_windows/test_data_capture_window_3.txt.gz"
    data = picoampere_signal_from_data_file(data_file)
    window = Window(1_187_841, 1_280_674)
    signal_threshold_frac = 0.7
    alt_open_channel_pA = 230
    terminal_capture_only = True
    filters = [filtering.LengthFilter(100, None)]
    delay = 0
    end_tol = 0
    channel_number = 2
    captures = segment.find_captures(
        data,
        channel_number,
        window,
        signal_threshold_frac,
        alt_open_channel_pA,
        terminal_capture_only=terminal_capture_only,
        filters=filters,
        delay=delay,
        end_tol=end_tol,
    )
    assert len(captures) == 1
示例#5
0
def find_captures_2_nocaptures_test():
    """Example capture window contains no captures.

    Test: find_captures returns no captures"""
    data_file = "src/tests/data/capture_windows/test_data_capture_window_2.txt.gz"
    data = picoampere_signal_from_data_file(data_file)
    window = Window(3_423_474, 3_516_439)
    signal_threshold_frac = 0.7
    alt_open_channel_pA = 230
    terminal_capture_only = False
    filters = [filtering.LengthFilter(100, None)]
    delay = 10
    end_tol = 0
    channel_number = 1
    captures = segment.find_captures(
        data,
        channel_number,
        window,
        signal_threshold_frac,
        alt_open_channel_pA,
        terminal_capture_only=terminal_capture_only,
        filters=filters,
        delay=delay,
        end_tol=end_tol,
    )
    assert len(captures) == 0
示例#6
0
def find_captures_1_double_capture_noterminal_2_test():
    """Example capture window contains 2 long captures, neither terminal.
    Also contains a few short blips.

    Test: terminal_capture_only = False returns 2 captures"""
    data_file = "src/tests/data/capture_windows/test_data_capture_window_1.txt.gz"
    data = picoampere_signal_from_data_file(data_file)
    window = Window(4_765_695, 4_858_482)
    signal_threshold_frac = 0.7
    alt_open_channel_pA = 230
    terminal_capture_only = False
    filters = [filtering.LengthFilter(100, None)]
    delay = 0
    end_tol = 0
    channel_number = 1
    captures = segment.find_captures(
        data,
        channel_number,
        window,
        signal_threshold_frac,
        alt_open_channel_pA,
        terminal_capture_only=terminal_capture_only,
        filters=filters,
        delay=delay,
        end_tol=end_tol,
    )
    assert len(captures) == 2
示例#7
0
def find_captures_0_single_capture_terminal_test():
    data_file = "src/tests/data/capture_windows/test_data_capture_window_0.txt.gz"
    window = Window(3_572_989, 3_665_680)
    data = picoampere_signal_from_data_file(data_file)
    actual_captures = [(33822 + window.start, 92691 + window.start, True)]
    signal_threshold_frac = 0.7
    alt_open_channel_pA = 230
    terminal_capture_only = True
    filters = [filtering.LengthFilter(100, None)]
    delay = 0
    end_tol = 0
    channel_number = 1
    captures = segment.find_captures(
        data,
        channel_number,
        window,
        signal_threshold_frac,
        alt_open_channel_pA,
        terminal_capture_only=terminal_capture_only,
        filters=filters,
        delay=delay,
        end_tol=end_tol,
    )

    assert len(captures) == len(actual_captures)
    for test_capture in captures:
        test_start = test_capture.window.start
        test_end = test_capture.window.end
        ejected = test_capture.ejected
        assert (test_start, test_end, ejected) in actual_captures
示例#8
0
    def segment_test(self):
        bulk_f5_fname = "src/tests/data/bulk_fast5_dummy.fast5"

        filters = [filtering.LengthFilter(100, None)]
        config = GeneralConfiguration(config={"n_workers": 2, "capture_directory": "src/tests"})

        segment_config = {
            "voltage_threshold": -180,
            "signal_threshold_frac": 0.7,
            "translocation_delay": 20,
            "open_channel_prior_mean": 220,
            "open_channel_prior_stdv": 50,
            "good_channels": [
                1,
                2,
                3,
            ],  # this will be internally overwritten by the good channels calculation, which should not include channel 2
            "end_tolerance": 50,
            "terminal_capture_only": False,
            "n_captures_per_file": 1000,
            "bulkfast5": bulk_f5_fname,
        }
        segment_config = SegmentConfiguration(segment_config)

        segment.segment(bulk_f5_fname, config, segment_config, overwrite=True, filters=filters)
        run_id = "d0befb838f5a9a966e3c559dc3a75a6612745849"
        actual_n_captures = 5
        n_captures = 0
        capture_f5_fname = f"src/tests/{run_id}_1.fast5"
        with h5py.File(capture_f5_fname, "r") as f5:
            for grp in f5.get("/"):
                if "read" not in grp:
                    continue
                n_captures += 1
                d = f5[grp]
                a = d["Signal"].attrs
                start_time_local = a.get("start_time_local")
                start_time_bulk = a.get("start_time_bulk")
                assert start_time_local == start_time_bulk  # No offset here

                duration = a.get("duration")
                len_signal = len(d["Signal"][()])
                assert len_signal == duration

                voltage = a.get("voltage")
                assert voltage == segment_config.voltage_threshold
                print(duration, a.get("channel_number"))
        assert n_captures == actual_n_captures
        os.remove(capture_f5_fname)
示例#9
0
    def parallel_find_captures_overflow_file_test(self):
        bulk_f5_fname = "src/tests/data/bulk_fast5_dummy.fast5"

        filters = [filtering.LengthFilter(100, None)]
        config = GeneralConfiguration(config={"n_workers": 2, "capture_directory": "src/tests"})

        segment_config = {
            "voltage_threshold": -180,
            "signal_threshold_frac": 0.7,
            "translocation_delay": 20,
            "open_channel_prior_mean": 220,
            "open_channel_prior_stdv": 50,
            "good_channels": [1, 3],
            "end_tolerance": 50,
            "terminal_capture_only": False,
            "n_captures_per_file": 2,
            "bulkfast5": bulk_f5_fname,
        }
        segment_config = SegmentConfiguration(segment_config)

        segment.parallel_find_captures(config, segment_config, overwrite=True, filters=filters)
        run_id = "d0befb838f5a9a966e3c559dc3a75a6612745849"
        actual_n_captures = 5
        n_captures = 0
        capture_f5_fnames = [
            os.path.join("src/tests/", x) for x in os.listdir("src/tests/") if run_id in x
        ]
        assert len(capture_f5_fnames) == 3
        for capture_f5_fname in capture_f5_fnames:
            with h5py.File(capture_f5_fname, "r") as f5:
                for grp in f5.get("/"):
                    if "read" not in grp:
                        continue
                    n_captures += 1
                    d = f5[grp]
                    a = d["Signal"].attrs
                    start_time_local = a.get("start_time_local")
                    start_time_bulk = a.get("start_time_bulk")
                    assert start_time_local == start_time_bulk  # No offset here

                    duration = a.get("duration")
                    len_signal = len(d["Signal"][()])
                    assert len_signal == duration

                    voltage = a.get("voltage")
                    assert voltage == segment_config.voltage_threshold
                    print(duration, a.get("channel_number"))
            os.remove(capture_f5_fname)
        assert n_captures == actual_n_captures
示例#10
0
def find_captures_7_capture_no_open_channel_test():
    """Example capture window contains 1 long terminal capture. Open pore region
    is extremely, extremely short. Test by cutting off the open pore region.

    Tests: find_captures returns 1 capture; open pore returns alt value.
    """
    data_file = "src/tests/data/capture_windows/test_data_capture_window_7.txt.gz"
    data = picoampere_signal_from_data_file(data_file)[100:]
    window = Window(2_919_913, 3_013_723)
    signal_threshold_frac = 0.7
    alt_open_channel_pA = 230
    terminal_capture_only = False
    filters = [filtering.LengthFilter(100, None)]
    delay = 100
    end_tol = 0
    channel_number = 2
    captures = segment.find_captures(
        data,
        channel_number,
        window,
        signal_threshold_frac,
        alt_open_channel_pA,
        terminal_capture_only=terminal_capture_only,
        filters=filters,
        delay=delay,
        end_tol=end_tol,
    )
    assert len(captures) == 1
    # Rough check; should be ~229.05 & anything close is okay.
    # The function is nondeterministic & should return this exact value, but if
    # future changes are made, some tolerance can be allowed.
    expected_open_channel_pA = 230

    open_channel_pA = np.array([capture.open_channel_pA_calculated for capture in captures])
    all_currents_within_bounds = all(
        (np.isclose(open_channel_pA, expected_open_channel_pA, atol=0.5))
    )

    assert (
        all_currents_within_bounds
    ), f"All captures should have calculated an open channel current close to {expected_open_channel_pA}."
示例#11
0
def find_captures_5_unfolded_terminal_test():
    """Example capture window contains 1 long terminal capture. It was captured
    almost immediately, causing a very short open pore region.

    Tests: find_captures returns 1 capture
    """
    data_file = "src/tests/data/capture_windows/test_data_capture_window_5.txt.gz"
    data = picoampere_signal_from_data_file(data_file)
    window = Window(965_676, 1_059_216)
    signal_threshold_frac = 0.7
    alt_open_channel_pA = 230
    terminal_capture_only = True
    filters = [filtering.LengthFilter(100, None)]
    delay = 0
    end_tol = 0
    channel_number = 1
    captures = segment.find_captures(
        data,
        channel_number,
        window,
        signal_threshold_frac,
        alt_open_channel_pA,
        terminal_capture_only=terminal_capture_only,
        filters=filters,
        delay=delay,
        end_tol=end_tol,
    )
    assert len(captures) == 1
    open_channel_pA = np.array([capture.open_channel_pA_calculated for capture in captures])
    low_expected_open_channel_pA = 228.5
    high_expected_open_channel_pA = 230
    # Rough check; should be ~229.05 & anything close is okay.
    # The function is nondeterministic & should return this exact value, but if
    # future changes are made, some tolerance can be allowed.
    all_currents_within_bounds = all(
        (open_channel_pA > low_expected_open_channel_pA)
        & (open_channel_pA < high_expected_open_channel_pA)
    )
    assert (
        all_currents_within_bounds
    ), f"Expect all capture open channel currents to be between '{low_expected_open_channel_pA}' and '{high_expected_open_channel_pA}'."
示例#12
0
def find_captures_8_capture_no_open_channel_test():
    """Example capture window contains 2 captures: both long, 1 terminal.
    Test non-terminal long capture.

    Tests: find_captures returns 2 captures.
           Checks exact capture boundaries with delay = 3
    """
    data_file = "src/tests/data/capture_windows/test_data_capture_window_8.txt.gz"
    data = picoampere_signal_from_data_file(data_file)
    window = Window(4_875_289, 4_969_337)
    signal_threshold_frac = 0.7
    alt_open_channel_pA = 230
    terminal_capture_only = False
    filters = [filtering.LengthFilter(100, None)]
    delay = 3
    end_tol = 0
    channel_number = 2
    captures = segment.find_captures(
        data,
        channel_number,
        window,
        signal_threshold_frac,
        alt_open_channel_pA,
        terminal_capture_only=terminal_capture_only,
        filters=filters,
        delay=delay,
        end_tol=end_tol,
    )
    assert len(captures) == 2
    actual_captures = [
        (11310 + window.start, 22098 + window.start, False),
        (26617 + window.start, 94048 + window.start, True),
    ]
    for test_capture in captures:
        test_start = test_capture.window.start
        test_end = test_capture.window.end
        ejected = test_capture.ejected
        assert (test_start, test_end, ejected) in actual_captures
示例#13
0
def find_captures_6_clog_no_open_channel_test():
    """Example capture window contains 1 long terminal capture. Open pore region
    is extremely, extremely short. Test by cutting off the open pore region.

    Tests: find_captures returns 1 capture; open pore returns alt value.
    """
    data_file = "src/tests/data/capture_windows/test_data_capture_window_6.txt.gz"
    data = picoampere_signal_from_data_file(data_file)[100:]
    window = Window(2_769_436, 2_863_265)
    signal_threshold_frac = 0.7
    alt_open_channel_pA = 230
    terminal_capture_only = False
    filters = [filtering.LengthFilter(100, None)]
    delay = 100
    end_tol = 0
    channel_number = 1
    captures = segment.find_captures(
        data,
        channel_number,
        window,
        signal_threshold_frac,
        alt_open_channel_pA,
        terminal_capture_only=terminal_capture_only,
        filters=filters,
        delay=delay,
        end_tol=end_tol,
    )
    assert len(captures) == 1
    open_channel_pA = np.array([capture.open_channel_pA_calculated for capture in captures])
    expected_open_channel_pA = 230
    all_currents_within_bounds = all(
        (np.isclose(open_channel_pA, expected_open_channel_pA, atol=0.5))
    )

    assert (
        all_currents_within_bounds
    ), f"All calculated open channel currents should be close to {expected_open_channel_pA}"