def test_find_unmapped_samples(tmp_path): runs_root = tmp_path run1_path = runs_root / '200101_M11111_0001_0000000-J1HRQ' run1_path.mkdir() samples_csv = StringIO("""\ sample,run,pid E0001-NFLHIVDNA_S1,200101_M11111,P1 E0002-NFLHIVDNA_S2,200101_M11111,P2 """) outcome_summary_csv = StringIO("""\ sample,run,passed,error E0001-NFLHIVDNA_S1,200101_M11111,True, E0002-NFLHIVDNA_S2,200101_M11111,False,primer error E0003-NFLHIVDNA_S3,200101_M11111,False,multiple contigs """) expected_participant_counts = { 'P1': dict(samples=1, passed=1), 'P2': dict(samples=1, errors=1, no_primer=1), 'E0003': dict(samples=1, errors=1, multiple_contigs=1) } expected_unmapped_samples = [('200101_M11111', 'E0003-NFLHIVDNA_S3')] summary = StudySummary() summary.load_samples(samples_csv, runs_root) summary.load_outcome(outcome_summary_csv) assert summary.participant_counts == expected_participant_counts assert summary.unmapped_samples == expected_unmapped_samples
def test_load_samples(tmp_path): runs_root = tmp_path run1_path = runs_root / '200101_M11111_0001_0000000-J1HRQ' run1_path.mkdir() run2_path = runs_root / '200115_M22222_0003_0000000-Y8E4T' run2_path.mkdir() samples_csv = StringIO("""\ sample,run,pid E0001_S1,200101_M11111,P1 E0002_S2,200101_M11111,P2 E0003_S1,200115_M22222,P2 """) summary = StudySummary() summary.load_samples(samples_csv, runs_root) assert summary.run_paths == (run1_path, run2_path)
def test_load_runs_not_in_samples(tmp_path): runs_root = tmp_path run1_path = runs_root / '200101_M11111_0001_0000000-J1HRQ' run1_path.mkdir() run2_path = runs_root / '200115_M22222_0003_0000000-Y8E4T' run2_path.mkdir() samples_csv = StringIO("""\ sample,run,pid E0001-NFLHIVDNA_S1,200101_M11111,P1 E0002-NFLHIVDNA_S2,200101_M11111,P2 """) requested_runs = [str(run1_path), str(run2_path)] summary = StudySummary() summary.load_runs(requested_runs) summary.load_samples(samples_csv, runs_root) assert summary.run_paths == (run1_path, run2_path)