def test_header_fields_inferred_from_two_variants(self): with TestPipeline() as p: variant_1 = self._get_sample_variant_1() variant_2 = self._get_sample_variant_2() inferred_headers = ( p | Create([variant_1, variant_2]) | 'InferUndefinedHeaderFields' >> infer_headers.InferUndefinedHeaderFields(defined_headers=None)) expected_infos = { 'IS': Info('IS', 1, 'String', '', '', ''), 'ISI': Info('ISI', 1, 'Integer', '', '', ''), 'ISF': Info('ISF', 1, 'Float', '', '', ''), 'IF': Info('IF', 1, 'Float', '', '', ''), 'IB': Info('IB', 0, 'Flag', '', '', ''), 'IA': Info('IA', None, 'Float', '', '', ''), 'IS_2': Info('IS_2', 1, 'String', '', '', '') } expected_formats = { 'FI': Format('FI', 1, 'Integer', ''), 'FU': Format('FU', None, 'Float', ''), 'FI_2': Format('FI_2', 1, 'Integer', '') } expected = vcf_header_io.VcfHeader(infos=expected_infos, formats=expected_formats) assert_that(inferred_headers, asserts.header_fields_equal_ignore_order([expected])) p.run()
def _get_inferred_headers( variants, # type: pvalue.PCollection merged_header # type: pvalue.PCollection ): # type: (...) -> (pvalue.PCollection, pvalue.PCollection) inferred_headers = (variants | 'FilterVariants' >> filter_variants.FilterVariants() | ' InferUndefinedHeaderFields' >> infer_headers.InferUndefinedHeaderFields( pvalue.AsSingleton(merged_header))) merged_header = ( (inferred_headers, merged_header) | beam.Flatten() | 'MergeHeadersFromVcfAndVariants' >> merge_headers.MergeHeaders(allow_incompatible_records=True)) return inferred_headers, merged_header
def test_defined_fields_filtered_one_variant(self): # All FORMATs and INFOs are already defined in the header section of VCF # files. with TestPipeline() as p: vcf_headers = self._get_sample_header_fields() vcf_headers_side_input = p | 'vcf_headers' >> Create([vcf_headers]) variant = self._get_sample_variant_1() inferred_headers = ( p | Create([variant]) | 'InferUndefinedHeaderFields' >> infer_headers.InferUndefinedHeaderFields( pvalue.AsSingleton(vcf_headers_side_input))) expected = vcf_header_io.VcfHeader() assert_that(inferred_headers, equal_to([expected])) p.run()
def _add_inferred_headers( pipeline, # type: beam.Pipeline known_args, # type: argparse.Namespace merged_header # type: pvalue.PCollection ): # type: (...) -> pvalue.PCollection inferred_headers = (_read_variants(pipeline, known_args) | 'FilterVariants' >> filter_variants.FilterVariants( reference_names=known_args.reference_names) | ' InferUndefinedHeaderFields' >> infer_headers.InferUndefinedHeaderFields( pvalue.AsSingleton(merged_header))) merged_header = ( (inferred_headers, merged_header) | beam.Flatten() | 'MergeHeadersFromVcfAndVariants' >> merge_headers.MergeHeaders( known_args.split_alternate_allele_info_fields, known_args.allow_incompatible_records)) return merged_header
def test_defined_fields_filtered_two_variants(self): # Only INFO and FORMAT in the first variants are already defined in the # header section of the VCF files. with TestPipeline() as p: vcf_headers = self._get_sample_header_fields() vcf_headers_side_input = p | 'vcf_header' >> Create([vcf_headers]) variant_1 = self._get_sample_variant_1() variant_2 = self._get_sample_variant_2() inferred_headers = ( p | Create([variant_1, variant_2]) | 'InferUndefinedHeaderFields' >> infer_headers.InferUndefinedHeaderFields( pvalue.AsSingleton(vcf_headers_side_input))) expected_infos = {'IS_2': Info('IS_2', 1, 'String', '', '', '')} expected_formats = {'FI_2': Format('FI_2', 1, 'Integer', '')} expected = vcf_header_io.VcfHeader(infos=expected_infos, formats=expected_formats) assert_that(inferred_headers, asserts.header_fields_equal_ignore_order([expected])) p.run()