def test_different_remap_counts(self): sample = Sample( MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(coverage_scores=[{ 'project': 'HIV', 'region': 'PR', 'seed': 'HIV1-seed', 'on.score': '3' }], remap_counts=[{ 'type': 'remap-1 HIV1-seed' }]), SampleFiles(coverage_scores=[{ 'project': 'HIV', 'region': 'PR', 'seed': 'HIV1-seed', 'on.score': '4' }], remap_counts=[{ 'type': 'remap-1 HIV1-seed' }, { 'type': 'remap-2 HIV1-seed' }])) expected_report = '' expected_scenario_counts = { Scenarios.REMAP_COUNTS_CHANGED: [' run1:sample42 coverage: HIV PR 3 => 4\n'] } report, scenario_counts, _ = compare_sample( sample, Scenarios.REMAP_COUNTS_CHANGED) self.assertEqual(expected_report, report) self.assertEqual(expected_scenario_counts, scenario_counts)
def test_equivalent_remap_counts(self): sample = Sample( MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(coverage_scores=[{ 'project': 'HIV', 'region': 'PR', 'seed': 'HIV1-seed', 'on.score': '3' }], remap_counts=[{ 'type': 'remap-1 HIV1-seed', 'count': '20' }]), SampleFiles(coverage_scores=[{ 'project': 'HIV', 'region': 'PR', 'seed': 'HIV1-seed', 'on.score': '4' }], remap_counts=[{ 'type': 'remap-1 HIV1-seed', 'count': '21' }])) expected_report = 'run1:sample42 coverage: HIV PR 3 => 4\n' expected_scenario_counts = {} report, scenario_counts, _ = compare_sample(sample) self.assertEqual(expected_report, report) self.assertEqual(expected_scenario_counts, scenario_counts)
def test_missing_coverage_different_remap_counts_no_scenarios(self): sample = Sample( MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(coverage_scores=[{ 'project': 'HCV', 'region': 'HCV1-E1', 'seed': 'HCV1-seed', 'on.score': '2' }, { 'project': 'HCV', 'region': 'HCV2-E1', 'seed': 'HCV2-seed', 'on.score': '3' }], remap_counts=[{ 'type': 'remap-1 HCV1-seed' }, { 'type': 'remap-1 HCV2-seed' }]), SampleFiles(coverage_scores=[{ 'project': 'HCV', 'region': 'HCV1-E1', 'seed': 'HCV1-seed', 'on.score': '2' }], remap_counts=[{ 'type': 'remap-1 HCV1-seed' }])) expected_report = 'run1:sample42 coverage: HCV HCV2-E1 3 => -\n' expected_scenario_counts = {} report, scenario_counts, _ = compare_sample(sample, Scenarios.NONE) self.assertEqual(expected_report, report) self.assertEqual(expected_scenario_counts, scenario_counts)
def test_multiple_coverage(self): sample = Sample( MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(coverage_scores=[{ 'project': 'HIV', 'region': 'PR', 'on.score': '3' }, { 'project': 'HIV', 'region': 'RT', 'on.score': '2' }]), SampleFiles(coverage_scores=[{ 'project': 'HIV', 'region': 'RT', 'on.score': '3' }, { 'project': 'HIV', 'region': 'PR', 'on.score': '4' }])) expected_report = ('run1:sample42 coverage: HIV PR 3 => 4\n' 'run1:sample42 coverage: HIV RT 2 => 3\n') report, _, _ = compare_sample(sample) self.assertEqual(expected_report, report)
def test_consensus_added(self): source_seqs = {} target_seqs = {('R1-seed', 'R1'): make_nuc_rows('ACTTAC')} coverage_scores = [{ 'seed': 'R1-seed', 'region': 'R1', 'project': 'R1', 'on.score': '4' }] sample = Sample( MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(region_consensus=source_seqs, coverage_scores=coverage_scores), SampleFiles(region_consensus=target_seqs, coverage_scores=coverage_scores)) expected_diffs = [ 'run1:sample42 consensus: R1-seed R1 MAX', '+ ACTTAC' ] expected_consensus_distances = [] diffs = [] scenarios = defaultdict(list) consensus_distances = compare_consensus(sample, diffs, Scenarios.NONE, scenarios) self.assertEqual(expected_diffs, diffs) self.assertEqual(expected_consensus_distances, consensus_distances)
def test_same_consensus(self): source_seqs = {('R1-seed', 'R1'): make_nuc_rows('ACACAC')} target_seqs = {('R1-seed', 'R1'): make_nuc_rows('ACACAC')} coverage_scores = [{ 'seed': 'R1-seed', 'region': 'R1', 'project': 'R1', 'on.score': '4' }] sample = Sample( MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(region_consensus=source_seqs, coverage_scores=coverage_scores), SampleFiles(region_consensus=target_seqs, coverage_scores=coverage_scores)) expected_diffs = [] expected_scenarios = {} diffs = [] scenarios = defaultdict(list) compare_consensus( sample, diffs, Scenarios.MAIN_CONSENSUS_CHANGED | Scenarios.OTHER_CONSENSUS_CHANGED, scenarios) self.assertEqual(expected_diffs, diffs) self.assertEqual(expected_scenarios, scenarios)
def test_one_consensus_changes(self): source_seqs = { ('R1-seed', 'R1', 'MAX'): 'ACACACGT', ('R2-seed', 'R2', 'MAX'): 'ACACACGT' } target_seqs = { ('R1-seed', 'R1', 'MAX'): 'ACACACGT', ('R2-seed', 'R2', 'MAX'): 'ACACAMGT' } sample = Sample(MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(), SampleFiles()) expected_diffs = [ 'run1:sample42 consensus: R2-seed R2 MAX', '- ACACACGT', '? ^', '+ ACACAMGT', '? ^' ] expected_consensus_distances = [ ConsensusDistance(region='R1', cutoff='MAX', distance=0, pct_diff=0), ConsensusDistance(region='R2', cutoff='MAX', distance=1, pct_diff=12.5) ] diffs = [] scenarios = defaultdict(list) consensus_distances = compare_consensus(sample, source_seqs, target_seqs, diffs, Scenarios.NONE, scenarios) self.assertEqual(expected_diffs, diffs) self.assertEqual(expected_consensus_distances, consensus_distances)
def test_consensus_change(self): source_seqs = {('R1-seed', 'R1'): make_nuc_rows('ACACACGT')} target_seqs = {('R1-seed', 'R1'): make_nuc_rows('ACACACGG')} coverage_scores = [{ 'seed': 'R1-seed', 'region': 'R1', 'project': 'R1', 'on.score': '4' }] sample = Sample( MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(region_consensus=source_seqs, coverage_scores=coverage_scores), SampleFiles(region_consensus=target_seqs, coverage_scores=coverage_scores)) expected_report = ('run1:sample42 consensus: R1-seed R1 MAX\n' '- ACACACGT\n' '? ^\n' '+ ACACACGG\n' '? ^\n') expected_consensus_distances = [ ConsensusDistance(region='R1', cutoff='MAX', distance=1, pct_diff=12.5) ] report, _, consensus_distances = compare_sample(sample) self.assertEqual(expected_report, report) self.assertEqual(expected_consensus_distances, consensus_distances)
def test_empty(self): sample = Sample(MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(), SampleFiles()) expected_report = '' report, _, _ = compare_sample(sample) self.assertEqual(expected_report, report)
def test_blank(self): sample = Sample(MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(g2p_summary=[dict(X4pct='50.00')]), SampleFiles(g2p_summary=[dict(X4pct='')])) expected_report = 'run1:sample42 G2P: 50.00 => \n' report, _, _ = compare_sample(sample) self.assertEqual(expected_report, report)
def test_other_difference_with_blanks(self): sample = Sample(MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(g2p_summary=[dict(X4pct='', other='x')]), SampleFiles(g2p_summary=[dict(X4pct='', other='y')])) expected_report = '' report, _, _ = compare_sample(sample) self.assertEqual(expected_report, report)
def test_same_final(self): sample = Sample( MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(g2p_summary=[dict(X4pct='2.99', final='X4')]), SampleFiles(g2p_summary=[dict(X4pct='3.01', final='X4')])) expected_report = '' report, _, _ = compare_sample(sample) self.assertEqual(expected_report, report)
def test_different_final(self): sample = Sample( MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(g2p_summary=[dict(X4pct='1.99', final='R5')]), SampleFiles(g2p_summary=[dict(X4pct='2.01', final='X4')])) expected_report = 'run1:sample42 G2P: R5 1.99 => X4 2.01\n' report, _, _ = compare_sample(sample) self.assertEqual(expected_report, report)
def test_missing_coverage_to_low(self): sample = Sample( MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(coverage_scores=None), SampleFiles(coverage_scores=[{ 'project': 'HIV', 'region': 'PR', 'on.score': '1' }])) expected_report = '' report, _, _ = compare_sample(sample) self.assertEqual(expected_report, report)
def test_one_consensus_changes(self): source_seqs = { ('R1-seed', 'R1'): make_nuc_rows('ACACACGT'), ('R2-seed', 'R2'): make_nuc_rows('ACACACGT') } target_seqs = { ('R1-seed', 'R1'): make_nuc_rows('ACACACGT'), ('R2-seed', 'R2'): make_nuc_rows('ACACAMGT') } coverage_scores = [{ 'seed': 'R1-seed', 'region': 'R1', 'project': 'R1', 'on.score': '4' }, { 'seed': 'R2-seed', 'region': 'R2', 'project': 'R2', 'on.score': '4' }] sample = Sample( MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(region_consensus=source_seqs, coverage_scores=coverage_scores), SampleFiles(region_consensus=target_seqs, coverage_scores=coverage_scores)) expected_diffs = [ 'run1:sample42 consensus: R2-seed R2 MAX', '- ACACACGT', '? ^', '+ ACACAMGT', '? ^' ] expected_consensus_distances = [ ConsensusDistance(region='R1', cutoff='MAX', distance=0, pct_diff=0), ConsensusDistance(region='R2', cutoff='MAX', distance=1, pct_diff=12.5) ] diffs = [] scenarios = defaultdict(list) consensus_distances = compare_consensus(sample, diffs, Scenarios.NONE, scenarios) self.assertEqual(expected_diffs, diffs) self.assertEqual(expected_consensus_distances, consensus_distances)
def test_same_consensus(self): source_seqs = {('R1-seed', 'R1', 'MAX'): 'ACACAC'} target_seqs = {('R1-seed', 'R1', 'MAX'): 'ACACAC'} sample = Sample(MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(), SampleFiles()) expected_diffs = [] expected_scenarios = {} diffs = [] scenarios = defaultdict(list) compare_consensus( sample, source_seqs, target_seqs, diffs, Scenarios.MAIN_CONSENSUS_CHANGED | Scenarios.OTHER_CONSENSUS_CHANGED, scenarios) self.assertEqual(expected_diffs, diffs) self.assertEqual(expected_scenarios, scenarios)
def test_consensus_added(self): source_seqs = {} target_seqs = {('R1-seed', 'R1', 'MAX'): 'ACTTAC'} sample = Sample(MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(), SampleFiles()) expected_diffs = [ 'run1:sample42 consensus: R1-seed R1 MAX', '+ ACTTAC' ] expected_consensus_distances = [] diffs = [] scenarios = defaultdict(list) consensus_distances = compare_consensus(sample, source_seqs, target_seqs, diffs, Scenarios.NONE, scenarios) self.assertEqual(expected_diffs, diffs) self.assertEqual(expected_consensus_distances, consensus_distances)
def test_other_consensus_change(self): source_seqs = {('R1-seed', 'R1', '0.250'): 'ACACAC'} target_seqs = {('R1-seed', 'R1', '0.250'): 'ACACAT'} sample = Sample(MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(), SampleFiles()) expected_diffs = [ 'run1:sample42 consensus: R1-seed R1 0.250', '- ACACAC', '? ^', '+ ACACAT', '? ^' ] expected_scenarios = {} diffs = [] scenarios = defaultdict(list) compare_consensus(sample, source_seqs, target_seqs, diffs, Scenarios.NONE, scenarios) self.assertEqual(expected_diffs, diffs) self.assertEqual(expected_scenarios, scenarios)
def test_consensus_change(self): sample = Sample( MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(coverage_scores=[{ 'region': 'R1', 'seed': 'R1-seed', 'on.score': '4' }], nuc_limits={'R1-seed': [('R1', 101, 108)]}, consensus=[{ 'region': 'R1-seed', 'consensus-percent-cutoff': 'MAX', 'offset': '100', 'sequence': 'ACACACGT' }]), SampleFiles(coverage_scores=[{ 'region': 'R1', 'seed': 'R1-seed', 'on.score': '4' }], nuc_limits={'R1-seed': [('R1', 101, 108)]}, consensus=[{ 'region': 'R1-seed', 'consensus-percent-cutoff': 'MAX', 'offset': '100', 'sequence': 'ACACACGG' }])) expected_report = ('run1:sample42 consensus: R1-seed R1 MAX\n' '- ACACACGT\n' '? ^\n' '+ ACACACGG\n' '? ^\n') expected_consensus_distances = [ ConsensusDistance(region='R1', cutoff='MAX', distance=1, pct_diff=12.5) ] report, _, consensus_distances = compare_sample(sample) self.assertEqual(expected_report, report) self.assertEqual(expected_consensus_distances, consensus_distances)
def test_hiv_seed_changed(self): sample = Sample( MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(consensus=[{ 'region': 'HIV1-X', 'consensus-percent-cutoff': 'MAX', 'offset': '100', 'sequence': 'ACACAC' }]), SampleFiles(consensus=[{ 'region': 'HIV1-Y', 'consensus-percent-cutoff': 'MAX', 'offset': '200', 'sequence': 'ACACAC' }])) expected_report = '' report, _, _ = compare_sample(sample) self.assertEqual(expected_report, report)
def test_consensus_trailing_change(self): source_seqs = {('R1-seed', 'R1', 'MAX'): 'ACTTAC------GTAC'} target_seqs = {('R1-seed', 'R1', 'MAX'): 'ACTTAC'} sample = Sample(MiseqRun(target_path='run1/Results/versionX'), 'sample42', SampleFiles(), SampleFiles()) expected_diffs = [ 'run1:sample42 consensus: R1-seed R1 MAX', '- ACTTAC------GTAC', '+ ACTTAC' ] expected_consensus_distances = [ ConsensusDistance(region='R1', cutoff='MAX', distance=4, pct_diff=25) ] diffs = [] scenarios = defaultdict(list) consensus_distances = compare_consensus(sample, source_seqs, target_seqs, diffs, Scenarios.NONE, scenarios) self.assertEqual(expected_diffs, diffs) self.assertEqual(expected_consensus_distances, consensus_distances)