示例#1
0
    def test_pipeline_read_file_pattern(self):
        with temp_dir.TempDir() as tempdir:
            headers_1 = [self.lines[1], self.lines[-1]]
            headers_2 = [self.lines[2], self.lines[3], self.lines[-1]]
            headers_3 = [self.lines[4], self.lines[-1]]

            file_name_1 = tempdir.create_temp_file(suffix='.vcf',
                                                   lines=headers_1)
            file_name_2 = tempdir.create_temp_file(suffix='.vcf',
                                                   lines=headers_2)
            file_name_3 = tempdir.create_temp_file(suffix='.vcf',
                                                   lines=headers_3)

            pipeline = TestPipeline()
            pcoll = pipeline | 'ReadHeaders' >> ReadVcfHeaders(
                os.path.join(tempdir.get_path(), '*.vcf'))

            expected = [
                _get_vcf_header_from_lines(h, file_name=file_name)
                for h, file_name in [(
                    headers_1,
                    file_name_1), (headers_2,
                                   file_name_2), (headers_3, file_name_3)]
            ]
            assert_that(pcoll, asserts.header_vars_equal(expected))
            pipeline.run()
示例#2
0
  def test_pipeline_read_file_headers(self):
    headers = self.lines
    self.lines = testdata_util.get_sample_vcf_file_lines()

    with temp_dir.TempDir() as tempdir:
      filename = tempdir.create_temp_file(suffix='.vcf', lines=self.lines)

      pipeline = TestPipeline()
      pcoll = pipeline | 'ReadHeaders' >> ReadVcfHeaders(filename)

      assert_that(pcoll, equal_to([_get_vcf_header_from_lines(headers)]))
      pipeline.run()