def test_pipeline_read_file_pattern(self): with temp_dir.TempDir() as tempdir: headers_1 = [self.lines[1], self.lines[-1]] headers_2 = [self.lines[2], self.lines[3], self.lines[-1]] headers_3 = [self.lines[4], self.lines[-1]] file_name_1 = tempdir.create_temp_file(suffix='.vcf', lines=headers_1) file_name_2 = tempdir.create_temp_file(suffix='.vcf', lines=headers_2) file_name_3 = tempdir.create_temp_file(suffix='.vcf', lines=headers_3) pipeline = TestPipeline() pcoll = pipeline | 'ReadHeaders' >> ReadVcfHeaders( os.path.join(tempdir.get_path(), '*.vcf')) expected = [ _get_vcf_header_from_lines(h, file_name=file_name) for h, file_name in [( headers_1, file_name_1), (headers_2, file_name_2), (headers_3, file_name_3)] ] assert_that(pcoll, asserts.header_vars_equal(expected)) pipeline.run()
def test_pipeline_read_file_headers(self): headers = self.lines self.lines = testdata_util.get_sample_vcf_file_lines() with temp_dir.TempDir() as tempdir: filename = tempdir.create_temp_file(suffix='.vcf', lines=self.lines) pipeline = TestPipeline() pcoll = pipeline | 'ReadHeaders' >> ReadVcfHeaders(filename) assert_that(pcoll, equal_to([_get_vcf_header_from_lines(headers)])) pipeline.run()