Пример #1
0
def test_mixture_failure_modes():
    freqs = microhapulator.load_marker_frequencies(
        data_file("freq/russ4-freq.tsv"))
    markers = microhapulator.load_marker_definitions(
        data_file("def/russ4-offsets.tsv"))
    seqs = microhapulator.load_marker_reference_sequences(
        data_file("refr/russ4-refr.fasta.gz"))
    profiles = [mhapi.sim(freqs) for _ in range(3)]
    with pytest.raises(ValueError,
                       match=r"number of profiles must match number of seeds"):
        for read in mhapi.seq(profiles, markers, seqs, seeds=[42, 1776]):
            pass
    with pytest.raises(
            ValueError,
            match=r"mismatch between contributor number and proportions"):
        for read in mhapi.seq(profiles,
                              markers,
                              seqs,
                              proportions=[0.5, 0.3, 0.1, 0.1]):
            pass
    message = r"specified proportions result in 0 reads for 1 or more individuals"
    with pytest.raises(ValueError, match=message):
        for read in mhapi.seq(profiles,
                              markers,
                              seqs,
                              totalreads=500,
                              proportions=[1, 100, 10000]):
            pass
Пример #2
0
def test_complex_genotype(capsys):
    profile = Profile(fromfile=data_file("prof/mixture-genotype.json"))
    markers = microhapulator.load_marker_definitions(
        data_file("def/russ4-offsets.tsv"))
    seqs = microhapulator.load_marker_reference_sequences(
        data_file("refr/russ4-refr.fasta.gz"))
    sequencer = mhapi.seq(list(profile.unmix()), markers, seqs, totalreads=200)
    for n, read in enumerate(sequencer):
        pass
    terminal = capsys.readouterr()
    assert terminal.err.count("Individual seed=") == 3
Пример #3
0
def test_uneven_mixture(capsys):
    freqs = microhapulator.load_marker_frequencies(
        data_file("freq/russ4-freq.tsv"))
    markers = microhapulator.load_marker_definitions(
        data_file("def/russ4-offsets.tsv"))
    seqs = microhapulator.load_marker_reference_sequences(
        data_file("refr/russ4-refr.fasta.gz"))
    profiles = [mhapi.sim(freqs) for _ in range(3)]
    sequencer = mhapi.seq(profiles,
                          markers,
                          seqs,
                          totalreads=500,
                          proportions=[0.5, 0.3, 0.2])
    for read in sequencer:
        pass
    terminal = capsys.readouterr()
    assert "numreads=250" in terminal.err
    assert "numreads=150" in terminal.err
    assert "numreads=100" in terminal.err
Пример #4
0
def test_even_mixture():
    seed = numpy.random.randint(1, 2**32 - 1)
    print("Seed:", seed)
    numpy.random.seed(seed)
    freqs = microhapulator.load_marker_frequencies(
        data_file("freq/acb-dozen-freq.tsv"))
    markers = microhapulator.load_marker_definitions(
        data_file("def/acb-dozen-offsets.tsv"))
    seqs = microhapulator.load_marker_reference_sequences(
        data_file("refr/acb-dozen-refr.fasta"))
    profiles = list()
    for _ in range(numpy.random.randint(2, 6)):
        p = mhapi.sim(freqs)
        profiles.append(p)
    sequencer = mhapi.seq(profiles, markers, seqs, totalreads=1000)
    for n, read1, read2 in sequencer:
        pass
    numfragments = n * 2
    assert numfragments == pytest.approx(1000, abs=50)
def test_load_marker_definitions_missing_column():
    message = r"column\(s\) missing from marker definition file: Marker"
    with pytest.raises(ValueError, match=message):
        microhapulator.load_marker_definitions(
            data_file("def/orange-offsets-missing.tsv"))
def test_load_marker_definitions(tsv, nrows, markerid, offset):
    markers = microhapulator.load_marker_definitions(data_file(tsv))
    assert list(markers.columns) == ["Marker", "Offset"]
    assert markers.shape[0] == nrows
    assert markers.Marker.iloc[10] == markerid
    assert markers.Offset.iloc[10] == offset