Python Summary示例，ariba.summary.Summary Python示例

示例#1

0

显示文件

文件： summary_test.py 项目： aslett1/ariba

 def test_init(self):
     '''Test init'''
     fofn = os.path.join(data_dir, 'summary_test_init.fofn')
     s = summary.Summary('out', fofn=fofn)
     self.assertEqual(s.filenames, ['file1', 'file2'])
     s = summary.Summary('out', filenames=['file42'])
     self.assertEqual(s.filenames, ['file42'])
     s = summary.Summary('out', fofn=fofn, filenames=['file42'])
     self.assertEqual(s.filenames, ['file42', 'file1', 'file2'])

示例#2

0

显示文件

文件： summary_test.py 项目： aslett1/ariba

    def test_load_file(self):
        '''Test _load_file'''
        s = summary.Summary('out', filenames=['spam', 'eggs'])
        infile = os.path.join(data_dir, 'summary_test_load_file.in.tsv')

        lines = [[
            'gene1', '27', '42', '1', '822', '822', '100.0', '.', '.', '.',
            '.', '.', '.', 'gene1.scaffold.1', '1490', '.', '.', '.', '.', '.',
            '.'
        ],
                 [
                     'gene2', '15', '44', '2', '780', '780', '100.0', '.', '.',
                     '.', '.', '.', '.', 'gene2.scaffold.2', '1124', '.', '.',
                     '.', '.', '.', '.'
                 ],
                 [
                     'gene2', '15', '46', '2', '780', '770', '99.0', '.', '.',
                     '.', '.', '.', '.', 'gene2.scaffold.3', '1097', '.', '.',
                     '.', '.', '.', '.'
                 ],
                 [
                     'gene3', '187', '48', '3', '750', '750', '98.93', 'SNP',
                     'SYN', '.', '318', '318', 'C', 'gene3.scaffold.1', '1047',
                     '319', '319', 'G', '.', '.', '.'
                 ]]
        dicts = [s._line2dict('\t'.join(x)) for x in lines]
        expected = {
            'gene1': [dicts[0]],
            'gene2': dicts[1:3],
            'gene3': [dicts[3]]
        }
        got = s._load_file(infile)
        self.assertEqual(expected, got)

示例#3

0

显示文件

文件： summary_test.py 项目： aslett1/ariba

 def test_line2dict(self):
     '''Test _line2dict'''
     line = '\t'.join([
         'gene1', '187', '42', '3', '750', '750', '98.93', 'SNP', 'SYN',
         '.', '66', '66', 'A', 'gene1.scaffold.1', '1047', '67', '67', 'C',
         '42', 'A', '22,20'
     ])
     s = summary.Summary('out', filenames=['spam', 'eggs'])
     expected = {
         'gene': 'gene1',
         'flag': flag.Flag(187),
         'reads': 42,
         'cluster': '3',
         'gene_len': 750,
         'assembled': 750,
         'pc_ident': 98.93,
         'var_type': 'SNP',
         'var_effect': 'SYN',
         'new_aa': '.',
         'gene_start': 66,
         'gene_end': 66,
         'gene_nt': 'A',
         'scaffold': 'gene1.scaffold.1',
         'scaff_len': 1047,
         'scaff_start': 67,
         'scaff_end': 67,
         'scaff_nt': 'C',
         'read_depth': 42,
         'alt_bases': 'A',
         'ref_alt_depth': '22,20'
     }
     self.assertEqual(s._line2dict(line), expected)

示例#4

0

显示文件

文件： summary_test.py 项目： ys4/ariba

    def test_whole_run(self):
        '''Test whole run to check csv ok (skip making tree)'''
        tmp_out = 'tmp.summary_test_whole_run.out'''
        infiles = [
            os.path.join(data_dir, 'summary_test_whole_run.in.1.tsv'),
            os.path.join(data_dir, 'summary_test_whole_run.in.2.tsv'),
        ]

        s = summary.Summary(
            tmp_out,
            filenames=infiles,
            make_phandango_tree=False,
            show_var_groups=True,
            show_known_vars=True,
            show_novel_vars=True
        )

        s.run()
        expected_file = os.path.join(data_dir, 'summary_test_whole_run.out.csv')
        # we don't know the full path of the input files, so check all the other columns
        with open(expected_file) as f:
            expected = [line.rstrip().split(',', maxsplit=1)[1] for line in f]
        with open(tmp_out + '.csv') as f:
            got = [line.rstrip().split(',', maxsplit=1)[1] for line in f]

        self.assertEqual(expected, got)
        os.unlink(tmp_out + '.csv')
        os.unlink(tmp_out + '.phandango.csv')

示例#5

0

显示文件

文件： summary_test.py 项目： ys4/ariba

    def test_to_matrix_all_cols(self):
        '''Test _to_matrix all columns'''
        infiles = [
            os.path.join(data_dir, 'summary_to_matrix.1.tsv'),
            os.path.join(data_dir, 'summary_to_matrix.2.tsv')
        ]

        fofn = 'tmp.summary_to_matrix_all_cols'
        with open(fofn, 'w') as f:
            print(infiles[0], 'sample1', file=f)
            print(infiles[1], file=f)
    

        s = summary.Summary('out', fofn=fofn, show_var_groups=True, show_known_vars=True, show_novel_vars=True)
        os.unlink(fofn)
        s.samples = summary.Summary._load_input_files(s.filenames, 90)
        s._gather_unfiltered_output_data()
        got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(s.filenames, s.all_data, s.all_potential_columns, s.cluster_columns)

        expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.id3.%:c2', 'noncoding1.14GT:o1', 'noncoding1.14GT.%:c2', 'noncoding1.14T:o1', 'noncoding1.14T.%:c2', 'noncoding1.6G:o1', 'noncoding1.6G.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.id2:o1', 'noncoding2.id2.%:c2', 'noncoding2.42T:o1', 'noncoding2.42T.%:c2', 'noncoding2.52GT:o1', 'noncoding2.52GT.%:c2', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1', 'presence_absence1.A10V:o1']
        expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.id3.%', 'noncoding1.14GT', 'noncoding1.14GT.%', 'noncoding1.14T', 'noncoding1.14T.%', 'noncoding1.6G', 'noncoding1.6G.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%', 'noncoding2.42T', 'noncoding2.42T.%', 'noncoding2.52GT', 'noncoding2.52GT.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.known_var', 'presence_absence1.novel_var', 'presence_absence1.A10V']
        expected_matrix = [
            ['sample1', 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 100.0, 'no', 'NA', 'no', 'NA', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 100.0, 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'],
            [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 100.0, 'het', 80.0, 'no', 'NA', 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'het', 40.0, 'no', 'NA', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes']
        ]

        self.assertEqual(expected_phandango_header, got_phandango_header)
        self.assertEqual(expected_csv_header, got_csv_header)
        self.assertEqual(expected_matrix, got_matrix)

示例#6

0

显示文件

    def test_to_matrix_assembled_only(self):
        '''Test _to_matrix with assembled column only'''
        infiles = [
            os.path.join(data_dir, 'summary_to_matrix.1.tsv'),
            os.path.join(data_dir, 'summary_to_matrix.2.tsv')
        ]

        s = summary.Summary('out', filenames=infiles, cluster_cols='assembled')
        s.samples = summary.Summary._load_input_files(s.filenames, 90)
        s._gather_unfiltered_output_data()
        got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(
            s.filenames, s.all_data, s.all_potential_columns,
            s.cluster_columns)

        expected_phandango_header = [
            'name', 'noncoding1.assembled:o1', 'noncoding2.assembled:o1',
            'presence_absence1.assembled:o1'
        ]
        expected_csv_header = [
            'name', 'noncoding1.assembled', 'noncoding2.assembled',
            'presence_absence1.assembled'
        ]
        expected_matrix = [[infiles[0], 'yes', 'yes', 'yes'],
                           [infiles[1], 'yes', 'yes', 'yes']]

        self.assertEqual(expected_phandango_header, got_phandango_header)
        self.assertEqual(expected_csv_header, got_csv_header)
        self.assertEqual(expected_matrix, got_matrix)

示例#7

0

显示文件

    def test_to_matrix_with_groups(self):
        '''Test _to_matrix with groups'''
        infiles = [
            os.path.join(data_dir, 'summary_to_matrix.1.tsv'),
            os.path.join(data_dir, 'summary_to_matrix.2.tsv')
        ]

        s = summary.Summary('out', filenames=infiles, show_var_groups=True)
        s.samples = summary.Summary._load_input_files(s.filenames, 90)
        s._gather_unfiltered_output_data()
        got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(
            s.filenames, s.all_data, s.all_potential_columns,
            s.cluster_columns)

        expected_phandango_header = [
            'name', 'noncoding1.assembled:o1', 'noncoding1.match:o1',
            'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1',
            'noncoding1.ctg_cov:c3', 'noncoding1.known_var:o1',
            'noncoding1.novel_var:o1', 'noncoding1.id1:o1',
            'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.id3.%:c2',
            'noncoding2.assembled:o1', 'noncoding2.match:o1',
            'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1',
            'noncoding2.ctg_cov:c3', 'noncoding2.known_var:o1',
            'noncoding2.novel_var:o1', 'noncoding2.id2:o1',
            'noncoding2.id2.%:c2', 'presence_absence1.assembled:o1',
            'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4',
            'presence_absence1.pct_id:c1', 'presence_absence1.ctg_cov:c3',
            'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1'
        ]
        expected_csv_header = [
            'name', 'noncoding1.assembled', 'noncoding1.match',
            'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.ctg_cov',
            'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1',
            'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.id3.%',
            'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq',
            'noncoding2.pct_id', 'noncoding2.ctg_cov', 'noncoding2.known_var',
            'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%',
            'presence_absence1.assembled', 'presence_absence1.match',
            'presence_absence1.ref_seq', 'presence_absence1.pct_id',
            'presence_absence1.ctg_cov', 'presence_absence1.known_var',
            'presence_absence1.novel_var'
        ]
        expected_matrix = [[
            infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', '10.0', 'yes',
            'no', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2',
            '98.33', '10.0', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 'yes',
            'presence_absence_ref1', '98.96', '20.1', 'no', 'yes'
        ],
                           [
                               infiles[1], 'yes', 'yes', 'noncoding_ref1',
                               '98.33', '50.1', 'yes', 'no', 'het', 80.0,
                               'yes', 100.0, 'yes', 'yes', 'noncoding_ref2',
                               '98.33', '10.0', 'yes', 'no', 'het', 40.0,
                               'yes', 'yes', 'presence_absence1', '98.96',
                               '51.1', 'no', 'yes'
                           ]]

        self.assertEqual(expected_phandango_header, got_phandango_header)
        self.assertEqual(expected_csv_header, got_csv_header)
        self.assertEqual(expected_matrix, got_matrix)

示例#8

0

显示文件

文件： summary_test.py 项目： aslett1/ariba

 def test_write_tsv(self):
     '''Test _write_tsv'''
     tmp_out = 'tmp.out.tsv'
     s = summary.Summary(tmp_out, filenames=['spam', 'eggs'])
     s.rows_out = [
         ['filename', 'gene1', 'gene3'],
         ['file2', 1, 3],
         ['file3', 2, 4],
     ]
     s._write_tsv()
     expected = os.path.join(data_dir, 'summary_test_write_tsv.out.tsv')
     self.assertTrue(filecmp.cmp(tmp_out, expected, shallow=False))
     os.unlink(tmp_out)

示例#9

0

显示文件

文件： summary_test.py 项目： aslett1/ariba

 def test_gather_output_rows(self):
     '''Test _gather_output_rows'''
     infiles = [
         os.path.join(data_dir, 'summary_test_gather_output_rows.in.1.tsv'),
         os.path.join(data_dir, 'summary_test_gather_output_rows.in.2.tsv')
     ]
     s = summary.Summary('out', filenames=infiles)
     s._gather_output_rows()
     expected = [
         ['filename', 'gene1', 'gene2', 'gene3'],
         [infiles[0], 3, 2, 0],
         [infiles[1], 3, 0, 3],
     ]
     self.assertEqual(expected, s.rows_out)

示例#10

0

显示文件

文件： summary_test.py 项目： aslett1/ariba

    def test_to_summary_number(self):
        '''Test _to_summary_number'''
        s = summary.Summary('out', filenames=['spam', 'eggs'])
        tests = [
            (0, 0),
            (64, 0),
            (7, 1),
            (259, 1),
            (15, 2),
            (27, 3),
        ]

        for t in tests:
            l = [{'flag': flag.Flag(t[0]), 'assembled': 42, 'pc_ident': 99}]
            self.assertEqual(s._to_summary_number(l), t[1])

        l = [{'flag': flag.Flag(27), 'assembled': 42, 'pc_ident': 89}]
        self.assertEqual(s._to_summary_number(l), 0)

示例#11

0

显示文件

文件： summary_test.py 项目： aslett1/ariba

    def test_filter_output_rows(self):
        '''Test _filter_output_rows'''
        s = summary.Summary('out', filenames=['spam', 'eggs'])
        s.rows_out = [
            ['filename', 'gene1', 'gene2', 'gene3'],
            ['file1', 0, 0, 0],
            ['file2', 1, 0, 3],
            ['file3', 2, 0, 4],
        ]

        expected = [
            ['filename', 'gene1', 'gene3'],
            ['file2', 1, 3],
            ['file3', 2, 4],
        ]

        s._filter_output_rows()
        self.assertEqual(s.rows_out, expected)

示例#12

0

显示文件

文件： summary_test.py 项目： ys4/ariba

    def test_to_matrix_cluster_only(self):
        '''Test _to_matrix with cluster columns only'''
        infiles = [
            os.path.join(data_dir, 'summary_to_matrix.1.tsv'),
            os.path.join(data_dir, 'summary_to_matrix.2.tsv')
        ]

        s = summary.Summary('out', filenames=infiles)
        s.samples = summary.Summary._load_input_files(s.filenames, 90)
        s._gather_unfiltered_output_data()
        got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(s.filenames, s.all_data, s.all_potential_columns, s.cluster_columns)

        expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1']
        expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.known_var', 'presence_absence1.novel_var']
        expected_matrix = [
            [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes'],
            [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes']
        ]

        self.assertEqual(expected_phandango_header, got_phandango_header)
        self.assertEqual(expected_csv_header, got_csv_header)
        self.assertEqual(expected_matrix, got_matrix)

示例#13

0

显示文件

文件： summary_test.py 项目： ys4/ariba

    def test_to_matrix_with_vars(self):
        '''Test _to_matrix with vars'''
        infiles = [
            os.path.join(data_dir, 'summary_to_matrix.1.tsv'),
            os.path.join(data_dir, 'summary_to_matrix.2.tsv')
        ]

        s = summary.Summary('out', filenames=infiles, show_known_vars=True, show_novel_vars=True)
        s.samples = summary.Summary._load_input_files(s.filenames, 90)
        s._gather_unfiltered_output_data()
        got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(s.filenames, s.all_data, s.all_potential_columns, s.cluster_columns)

        expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.14GT:o1', 'noncoding1.14GT.%:c2', 'noncoding1.14T:o1', 'noncoding1.14T.%:c2', 'noncoding1.6G:o1', 'noncoding1.6G.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.42T:o1', 'noncoding2.42T.%:c2', 'noncoding2.52GT:o1', 'noncoding2.52GT.%:c2', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1', 'presence_absence1.A10V:o1']
        expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.14GT', 'noncoding1.14GT.%', 'noncoding1.14T', 'noncoding1.14T.%', 'noncoding1.6G', 'noncoding1.6G.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.42T', 'noncoding2.42T.%', 'noncoding2.52GT', 'noncoding2.52GT.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.known_var', 'presence_absence1.novel_var', 'presence_absence1.A10V']
        expected_matrix = [
            [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'no', 'NA', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes', 100.0, 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'],
            [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'no', 'NA', 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'no', 'NA', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes']
        ]

        self.assertEqual(expected_phandango_header, got_phandango_header)
        self.assertEqual(expected_csv_header, got_csv_header)
        self.assertEqual(expected_matrix, got_matrix)

示例#14

0

显示文件

文件： summary_test.py 项目： ys4/ariba

    def test_gather_unfiltered_output_data(self):
        '''test gather_unfiltered_output_data'''
        infiles = [
            os.path.join(data_dir, 'summary_gather_unfiltered_output_data.in.1.tsv'),
            os.path.join(data_dir, 'summary_gather_unfiltered_output_data.in.2.tsv')
        ]

        expected_all = {
            infiles[0]: {
                'noncoding1': {
                    'summary': {
                        'assembled': 'yes',
                        'known_var': 'yes',
                        'match': 'yes',
                        'novel_var': 'no',
                        'pct_id': '98.33',
                        'ref_seq': 'noncoding_ref1'
                    },
                    'groups': {},
                    'vars': {},
                },
                'noncoding2': {
                    'summary': {
                        'assembled': 'yes',
                        'known_var': 'yes',
                        'match': 'yes',
                        'novel_var': 'no',
                        'pct_id': '98.33',
                        'ref_seq': 'noncoding_ref2'
                    },
                    'groups': {},
                    'vars': {},
                },
                'presence_absence1': {
                    'summary': {
                        'assembled': 'yes',
                        'known_var': 'no',
                        'match': 'yes',
                        'novel_var': 'yes',
                        'pct_id': '98.96',
                        'ref_seq': 'presence_absence_ref1'
                    },
                    'groups': {},
                    'vars': {},
                },
                'presence_absence2': {
                    'summary': {
                            'assembled': 'partial',
                            'known_var': 'no',
                            'match': 'no',
                            'novel_var': 'yes',
                            'pct_id': '99.1',
                            'ref_seq': 'presence_absence_ref2'
                    },
                    'groups': {},
                    'vars': {}
                }
            },
            infiles[1]: {
                'noncoding1': {
                    'summary': {'assembled': 'yes',
                        'known_var': 'yes',
                        'match': 'yes',
                        'novel_var': 'no',
                        'pct_id': '98.33',
                        'ref_seq': 'noncoding_ref1'
                     },
                    'groups': {},
                    'vars': {},
                },
                'noncoding2': {
                    'summary': {
                        'assembled': 'yes',
                        'known_var': 'yes',
                        'match': 'yes',
                        'novel_var': 'no',
                        'pct_id': '98.33',
                        'ref_seq': 'noncoding_ref2'
                    },
                    'groups': {},
                    'vars': {},
                },
                'presence_absence1': {
                    'summary': {
                            'assembled': 'yes',
                            'known_var': 'no',
                            'match': 'yes',
                            'novel_var': 'yes',
                            'pct_id': '98.96',
                            'ref_seq': 'presence_absence1'
                    },
                    'groups': {},
                    'vars': {}
                },
            }
        }

        expected_potential_cols = {
            'noncoding1': {
                'summary': {
                    'assembled',
                    'known_var',
                    'match',
                    'novel_var',
                    'pct_id',
                    'ref_seq'
                },
                'groups': set(),
                'vars': set()
            },
            'noncoding2': {
                'summary': {
                    'assembled',
                    'known_var',
                    'match',
                    'novel_var',
                    'pct_id',
                    'ref_seq'
                },
                'groups': set(),
                'vars': set()
            },
            'presence_absence1': {
                'summary': {
                    'assembled',
                    'known_var',
                    'match',
                    'novel_var',
                    'pct_id',
                    'ref_seq'
                },
                'groups': set(),
                'vars': set()
            },
            'presence_absence2': {
                'summary': {
                    'assembled',
                    'known_var',
                    'match',
                    'novel_var',
                    'pct_id',
                    'ref_seq'
                },
                'groups': set(),
                'vars': set()
            }
        }

        self.maxDiff = None
        s = summary.Summary('out', filenames=infiles)
        s.samples = summary.Summary._load_input_files(s.filenames, 90)
        s._gather_unfiltered_output_data()
        self.assertEqual(expected_potential_cols, s.all_potential_columns)
        self.assertEqual(expected_all, s.all_data)

        expected_potential_cols['noncoding1']['groups'] = {'id3', 'id1', 'id1.%', 'id3.%'}
        expected_potential_cols['noncoding2']['groups'] = {'id2.%', 'id2'}
        expected_all[infiles[0]]['noncoding1']['groups'] = {'id1': 'yes', 'id1.%': 100.0}
        expected_all[infiles[0]]['noncoding2']['groups'] = {'id2': 'yes_multi_het', 'id2.%': 'NA'}
        expected_all[infiles[1]]['noncoding1']['groups'] = {'id1': 'het', 'id1.%': 80.0, 'id3': 'yes', 'id3.%': 100.0}
        expected_all[infiles[1]]['noncoding2']['groups'] = {'id2': 'het', 'id2.%': 40.0}
        s = summary.Summary('out', filenames=infiles, show_var_groups=True)
        s.samples = summary.Summary._load_input_files(s.filenames, 90)
        s._gather_unfiltered_output_data()
        self.assertEqual(expected_potential_cols, s.all_potential_columns)
        self.assertEqual(expected_all, s.all_data)

        expected_potential_cols['noncoding1']['vars'] = {'14T', '14T.%', '14GT', '14GT.%', '6G', '6G.%'}
        expected_potential_cols['noncoding2']['vars'] = {'52GT', '52GT.%', '42T', '42T.%'}

        expected_all[infiles[0]]['noncoding1']['vars'] = {'14T': 'yes', '14T.%': 100.0}
        expected_all[infiles[0]]['noncoding2']['vars'] = {'42T': 'yes', '42T.%': 100.0, '52GT': 'het', '52GT.%': 40.0}
        expected_all[infiles[1]]['noncoding1']['vars'] = {'14GT': 'het', '14GT.%': 80.0, '6G': 'yes', '6G.%': 100.0}
        expected_all[infiles[1]]['noncoding2']['vars'] = {'52GT': 'het', '52GT.%': 40.0}
        s = summary.Summary('out', filenames=infiles, show_var_groups=True, show_known_vars=True)
        s.samples = summary.Summary._load_input_files(s.filenames, 90)
        s._gather_unfiltered_output_data()
        self.assertEqual(expected_potential_cols, s.all_potential_columns)
        self.assertEqual(expected_all, s.all_data)

        expected_potential_cols['presence_absence1']['vars'] = {'A10V'}
        expected_potential_cols['presence_absence2']['vars'] = {'V175L'}
        expected_all[infiles[0]]['presence_absence1']['vars'] = {'A10V': 'yes'}
        expected_all[infiles[0]]['presence_absence2']['vars'] = {'V175L': 'yes'}
        expected_all[infiles[1]]['presence_absence1']['vars'] = {'A10V': 'yes'}
        s = summary.Summary('out', filenames=infiles, show_var_groups=True, show_known_vars=True, show_novel_vars=True)
        s.samples = summary.Summary._load_input_files(s.filenames, 90)
        s._gather_unfiltered_output_data()
        self.assertEqual(expected_potential_cols, s.all_potential_columns)
        self.assertEqual(expected_all, s.all_data)