Exemplo n.º 1
0
def test_find_unmapped_samples(tmp_path):
    runs_root = tmp_path
    run1_path = runs_root / '200101_M11111_0001_0000000-J1HRQ'
    run1_path.mkdir()

    samples_csv = StringIO("""\
sample,run,pid
E0001-NFLHIVDNA_S1,200101_M11111,P1
E0002-NFLHIVDNA_S2,200101_M11111,P2
""")
    outcome_summary_csv = StringIO("""\
sample,run,passed,error
E0001-NFLHIVDNA_S1,200101_M11111,True,
E0002-NFLHIVDNA_S2,200101_M11111,False,primer error
E0003-NFLHIVDNA_S3,200101_M11111,False,multiple contigs
""")
    expected_participant_counts = {
        'P1': dict(samples=1, passed=1),
        'P2': dict(samples=1, errors=1, no_primer=1),
        'E0003': dict(samples=1, errors=1, multiple_contigs=1)
    }
    expected_unmapped_samples = [('200101_M11111', 'E0003-NFLHIVDNA_S3')]
    summary = StudySummary()

    summary.load_samples(samples_csv, runs_root)
    summary.load_outcome(outcome_summary_csv)

    assert summary.participant_counts == expected_participant_counts
    assert summary.unmapped_samples == expected_unmapped_samples
Exemplo n.º 2
0
def test_load_samples(tmp_path):
    runs_root = tmp_path
    run1_path = runs_root / '200101_M11111_0001_0000000-J1HRQ'
    run1_path.mkdir()
    run2_path = runs_root / '200115_M22222_0003_0000000-Y8E4T'
    run2_path.mkdir()

    samples_csv = StringIO("""\
sample,run,pid
E0001_S1,200101_M11111,P1
E0002_S2,200101_M11111,P2
E0003_S1,200115_M22222,P2
""")
    summary = StudySummary()

    summary.load_samples(samples_csv, runs_root)

    assert summary.run_paths == (run1_path, run2_path)
Exemplo n.º 3
0
def test_load_runs_not_in_samples(tmp_path):
    runs_root = tmp_path
    run1_path = runs_root / '200101_M11111_0001_0000000-J1HRQ'
    run1_path.mkdir()
    run2_path = runs_root / '200115_M22222_0003_0000000-Y8E4T'
    run2_path.mkdir()

    samples_csv = StringIO("""\
sample,run,pid
E0001-NFLHIVDNA_S1,200101_M11111,P1
E0002-NFLHIVDNA_S2,200101_M11111,P2
""")
    requested_runs = [str(run1_path), str(run2_path)]
    summary = StudySummary()

    summary.load_runs(requested_runs)
    summary.load_samples(samples_csv, runs_root)

    assert summary.run_paths == (run1_path, run2_path)