예제 #1
0
    def test_filter_with_just_field_name_excludes_missing_rows(self, standard_gwas_parser_basic):
        reader = readers.IterableReader(["1\t100\tnull\tNone\t0.05", "2\t200\tA\tC\t5e-8"],
                                        parser=standard_gwas_parser_basic)
        reader.add_filter('ref')
        assert len(reader._filters) == 1

        # File will act on it
        assert len(list(reader)) == 1, "output was restricted to the expected rows"
예제 #2
0
    def test_writer_defaults_to_parser_columns(self, tmpdir, standard_gwas_parser_basic):
        reader = readers.IterableReader(['1\t100\tA\tC\t0.05', '2\t200\tA\tC\t5e-8'],
                                        parser=standard_gwas_parser_basic)
        expected_fn = tmpdir / 'test.txt'
        out_fn = reader.write(expected_fn)

        with open(out_fn, 'r') as f:
            assert f.readline() == '#chrom\tpos\trsid\tref\talt\tneg_log_pvalue\tbeta\tstderr_beta\talt_allele_freq\n'
예제 #3
0
    def test_can_write_output(self, tmpdir, standard_gwas_parser_basic):
        reader = readers.IterableReader(["1\t100\tA\tC\t0.05", "2\t200\tA\tC\t5e-8"],
                                        parser=standard_gwas_parser_basic)
        expected_fn = tmpdir / 'test.txt'
        out_fn = reader.write(expected_fn, columns=['chrom'], make_tabix=False)

        assert expected_fn == out_fn
        assert os.path.isfile(out_fn), "Output filename exists"
        with open(out_fn, 'r') as f:
            assert f.readlines() == ["#chrom\n", "1\n", "2\n"]
예제 #4
0
    def test_writer_represents_missing_data_correctly(self, tmpdir, standard_gwas_parser_basic):
        """The writer should represent explicit missing values as `.` (instead of eg Python None)"""
        reader = readers.IterableReader(["1\t100\tA\tC\tNone", "2\t200\tA\tC\t."],
                                        parser=standard_gwas_parser_basic)
        expected_fn = tmpdir / 'test.txt'
        out_fn = reader.write(expected_fn, columns=['neg_log_pvalue'], make_tabix=False)

        assert expected_fn == out_fn
        assert os.path.isfile(out_fn), "Output filename exists"
        with open(out_fn, 'r') as f:
            assert f.readlines() == ["#neg_log_pvalue\n", ".\n", ".\n"]
예제 #5
0
    def test_can_write_tabixed_output(self, tmpdir, standard_gwas_parser_basic):
        reader = readers.IterableReader(["1\t100\tA\tC\t0.05", "2\t200\tA\tC\t5e-8"],
                                        parser=standard_gwas_parser_basic)
        expected_fn = tmpdir / 'test.gz'
        out_fn = reader.write(str(expected_fn), columns=['chrom', 'pos'], make_tabix=True)

        assert expected_fn != out_fn
        assert out_fn.endswith('.gz')
        assert os.path.exists('{}.tbi'.format(out_fn)), "Tabix index exists"

        assert os.path.isfile(out_fn), "Output filename exists"

        # Now try to use the file that was written
        check_output = readers.TabixReader(out_fn)
        assert len(list(check_output.fetch('1', 1, 300))) == 1, 'Output file can be read with tabix features'
예제 #6
0
 def test_can_optionally_iterate_sans_parsing(self):
     reader = readers.IterableReader(["walrus", "carpenter"], parser=None)
     results = list(reader)
     assert results == ["walrus", "carpenter"], "Returns unparsed data"
예제 #7
0
 def test_skips_empty_rows_padding_file(self):
     reader = readers.IterableReader(["", ""])
     results = list(reader)
     assert len(results) == 0, "Skipped empty lines"
예제 #8
0
 def test_can_specify_iterable_as_source(self):
     # Any row-based iterable can serve as the source and will be parsed as expected
     reader = readers.IterableReader(["X\t1\tA\tG"])
     result = next(iter(reader))
     assert result[0] == "X"
예제 #9
0
 def test_transforms_can_work_with_any_parser(self):
     # Transforms bypass the mechanisms for data validation, so they don't perform sanity checks on field names.
     #  A side effect is that they can be used even if the parser doesn't support field names
     reader = readers.IterableReader([[1, 2, 3]], parser=None)
     reader.add_transform(lambda row: row)
     assert all(isinstance(row, list) for row in reader), 'Transforms work even without named fields'
예제 #10
0
 def test_named_fields_require_named_field_parser(self):
     reader = readers.IterableReader([[1, 2, 3]], parser=None)
     with pytest.raises(exceptions.ConfigurationException, match='name-based'):
         reader.add_lookup('chrom', lambda parsed: 'Y')
예제 #11
0
 def test_add_filter_fails_with_too_many_arguments(self):
     reader = readers.IterableReader(["X\t1\tA\tG"])
     with pytest.raises(exceptions.ConfigurationException, match='Invalid filter format'):
         reader.add_filter('afield', 42, 'superfluous argument')
예제 #12
0
 def test_no_headers_in_short_file(self):
     reader = readers.IterableReader(['walrus', 'carpenter'], parser=None)
     with pytest.raises(exceptions.SnifferException, match='entire file'):
         sniffers.get_headers(reader, delimiter='\t')
예제 #13
0
 def test_warns_if_file_is_unreadable(self):
     reader = readers.IterableReader(['mwa', 'ha', 'ha'], parser=doomed_parser, skip_errors=True, max_errors=2)
     with pytest.raises(exceptions.TooManyBadLinesException):
         list(reader)
     assert len(reader.errors) == 2, "Reader gave up after two lines, but tracked the errors"
예제 #14
0
 def test_can_track_errors(self):
     reader = readers.IterableReader(['mwa', 'ha', 'ha'], parser=doomed_parser, skip_errors=True, max_errors=10)
     results = list(reader)
     assert len(results) == 0, "No data could actually be read!"
     assert len(reader.errors) == 3, "Three lines could not be parsed"
예제 #15
0
 def test_can_fail_on_first_error(self):
     reader = readers.IterableReader(['mwa', 'ha', 'ha'], parser=doomed_parser, skip_errors=False)
     with pytest.raises(exceptions.LineParseException):
         list(reader)
예제 #16
0
 def test_writer_validates_options_when_sending_to_console(self, standard_gwas_parser_basic):
     reader = readers.IterableReader(['1\t100\tA\tC\t0.05', '2\t200\tA\tC\t5e-8'],
                                     parser=standard_gwas_parser_basic)
     with pytest.raises(exceptions.ConfigurationException, match='stream'):
         reader.write(make_tabix=True)
예제 #17
0
 def test_writer_can_send_to_console_stdout(self, capsys, standard_gwas_parser_basic):
     reader = readers.IterableReader(['1\t100\tA\tC\t0.05', '2\t200\tA\tC\t5e-8'],
                                     parser=standard_gwas_parser_basic)
     reader.write()
     out, err = capsys.readouterr()
     assert out.splitlines()[0] == '#chrom\tpos\trsid\tref\talt\tneg_log_pvalue\tbeta\tstderr_beta\talt_allele_freq'
예제 #18
0
 def test_writer_needs_to_know_column_names(self, tmpdir):
     reader = readers.IterableReader(['1\t100\tA\tC\t0.05', '2\t200\tA\tC\t5e-8'],
                                     parser=lambda line: ('A', 'B'))
     expected_fn = tmpdir / 'test.txt'
     with pytest.raises(exceptions.ConfigurationException, match='column names'):
         reader.write(expected_fn)
예제 #19
0
 def test_handles_lack_of_headers(self):
     reader = readers.IterableReader(['X\t100', 'X\t101'], parser=None)
     n, content = sniffers.get_headers(reader, delimiter='\t')
     assert n == 0, 'File has no header rows'
     assert content is None, 'No header row, so headers are blank'
예제 #20
0
 def test_stops_header_search_after_limit(self):
     reader = readers.IterableReader(['walrus', 'carpenter'], parser=None)
     with pytest.raises(exceptions.SnifferException, match='after limit'):
         sniffers.get_headers(reader, delimiter='\t', max_check=1)
예제 #21
0
 def test_add_filter_validates_one_argument_syntax(self):
     reader = readers.IterableReader(["X\t1\tA\tG"])
     with pytest.raises(exceptions.ConfigurationException, match='function or a field name'):
         reader.add_filter(42)
예제 #22
0
 def test_can_find_headers(self):
     reader = readers.IterableReader(
         ["#Comment line", '#Header\tLabels', 'X\t100'], parser=None)
     n, content = sniffers.get_headers(reader, delimiter='\t')
     assert n == 2, 'Skipped two header rows'
     assert content == '#Header\tLabels', 'Found correct header row'