def __init__(self, report_tsv, mlst_file, outprefix): self.summary_sample = summary_sample.SummarySample(report_tsv) self.mlst_profile = mlst_profile.MlstProfile(mlst_file, duplicate_warnings=False) self.outprefix = outprefix self.allele_calls = {} self.any_allele_unsure = False
def test_variant_column_names_tuples_and_het_snps(self): '''Test _variant_column_names_tuples_and_het_snps''' infile = os.path.join( data_dir, 'summary_sample_test_column_names_tuples_and_het_snps.tsv') sample_summary = summary_sample.SummarySample(infile) sample_summary.clusters = sample_summary._load_file(infile, 90) sample_summary.column_summary_data = sample_summary._column_summary_data( ) expected_column_names = { 'cluster.v': {('variants_only1', 'S5T', 'ungrouped', None)}, 'cluster.n': {('noncoding1', 'A6G', 'grouped', 'id2'), ('noncoding1', 'A14T', 'ungrouped', None), ('noncoding1', 'G15T', 'novel', None)}, 'cluster.p': {('presence_absence1', 'A10V', 'grouped', 'id3')} } got_column_names, got_het_snps = sample_summary._variant_column_names_tuples_and_het_snps( ) self.assertEqual(expected_column_names, got_column_names) expected_het_snps = { 'cluster.v': {}, 'cluster.n': { '.': { 'A14T': 80.0 } }, 'cluster.p': {}, } self.assertEqual(expected_het_snps, got_het_snps)
def test_column_summary_data(self): '''Test _column_summary_data''' infile = os.path.join(data_dir, 'summary_sample_test_column_summary_data.tsv') sample_summary = summary_sample.SummarySample(infile) sample_summary.clusters = sample_summary._load_file(infile, 90) expected = { 'cluster.n': { 'assembled': 'yes', 'match': 'yes', 'ref_seq': 'noncoding1', 'known_var': 'yes', 'novel_var': 'yes', 'pct_id': '98.33' }, 'cluster.p': { 'assembled': 'yes', 'match': 'yes', 'ref_seq': 'presence_absence1', 'known_var': 'yes', 'novel_var': 'no', 'pct_id': '98.96' }, 'cluster.v': { 'assembled': 'yes', 'match': 'yes', 'ref_seq': 'variants_only1', 'known_var': 'yes', 'novel_var': 'no', 'pct_id': '100.0' } } self.maxDiff = None got = sample_summary._column_summary_data() self.assertEqual(expected, got)
def _load_input_files(cls, filenames, min_id, verbose=False, only_clusters=None): samples = {} for filename in filenames: samples[filename] = summary_sample.SummarySample(filename, min_pc_id=min_id, only_clusters=only_clusters) samples[filename].run() if verbose: print('Loaded file', filename, flush=True) return samples
def test_load_input_files(self): '''Test _load_input_files''' file1 = os.path.join(data_dir, 'summary_test_load_input_files.1.tsv') file2 = os.path.join(data_dir, 'summary_test_load_input_files.2.tsv') sample1 = summary_sample.SummarySample(file1) sample2 = summary_sample.SummarySample(file2) sample1.run() sample2.run() got = summary.Summary._load_input_files([file1, file2], 90) expected = {file1: sample1, file2: sample2} self.assertEqual(expected, got) sample1 = summary_sample.SummarySample(file1, only_clusters={'noncoding1'}) sample2 = summary_sample.SummarySample(file2, only_clusters={'noncoding1'}) sample1.run() sample2.run() expected = {file1: sample1, file2: sample2} got = summary.Summary._load_input_files([file1, file2], 90, only_clusters={'noncoding1'}) self.assertEqual(expected, got)
def test_var_groups(self): '''test _var_groups''' infile = os.path.join(data_dir, 'summary_sample_test_var_groups.tsv') sample_summary = summary_sample.SummarySample(infile) sample_summary.clusters = sample_summary._load_file(infile, 90) got = sample_summary._var_groups() expected = { 'cluster.n': {'id1', 'id2'}, 'cluster.p': {'id3'}, 'cluster.v': {'id4'} } self.assertEqual(expected, got)