def __init__(
            self,
            info_keys_to_move_to_calls_regex,  # type: str
            copy_quality_to_calls,  # type: bool
            copy_filter_to_calls,  # type: bool
            window_size=DEFAULT_WINDOW_SIZE  # type: int
    ):
        # type: (...) -> None
        """Initializes the strategy.

    Args:
      info_keys_to_move_to_calls_regex: A regular expression specifying info
        fields that should be moved to calls.
      copy_quality_to_calls: Whether to copy the quality field to the associated
        calls in each record.
      copy_filter_to_calls: Whether to copy filter field to the  associated
        calls in each record.
      window_size: Size of windows that variants will be grouped in based on the
        start position of the variant.
    """
        self._move_to_calls = move_to_calls_strategy.MoveToCallsStrategy(
            info_keys_to_move_to_calls_regex=info_keys_to_move_to_calls_regex,
            copy_quality_to_calls=copy_quality_to_calls,
            copy_filter_to_calls=copy_filter_to_calls)
        self._window_size = window_size
    def test_get_merge_keys(self):
        strategy = move_to_calls_strategy.MoveToCallsStrategy(None, None, None)

        def get_expected_key(reference_name, start, end, reference_bases,
                             alternate_bases):
            return '%s:%s:%s:%s:%s' % (
                reference_name or '', str(start or ''), str(
                    end or ''), strategy._get_hash(reference_bases or ''),
                strategy._get_hash(','.join(alternate_bases or [])))

        variant = vcfio.Variant()
        self.assertEqual(get_expected_key(None, None, None, None, None),
                         next(strategy.get_merge_keys(variant)))

        variant.reference_name = '19'
        self.assertEqual(get_expected_key(19, None, None, None, None),
                         next(strategy.get_merge_keys(variant)))

        variant.start = 123
        variant.end = 125
        variant.reference_bases = 'AT'
        self.assertEqual(get_expected_key(19, 123, 125, 'AT', None),
                         next(strategy.get_merge_keys(variant)))

        variant.alternate_bases = ['A', 'C']
        self.assertEqual(get_expected_key(19, 123, 125, 'AT', ['A', 'C']),
                         next(strategy.get_merge_keys(variant)))
    def test_get_merged_variants_no_custom_options(self):
        strategy = move_to_calls_strategy.MoveToCallsStrategy(
            info_keys_to_move_to_calls_regex=None,
            copy_quality_to_calls=False,
            copy_filter_to_calls=False)
        variants = self._get_sample_variants()

        # Test single variant merge.
        self.assertEqual([variants[0]],
                         strategy.get_merged_variants([variants[0]]))

        # Test multiple variant merge.
        merged_variant = strategy.get_merged_variants(variants)[0]
        self._assert_common_expected_merged_fields(merged_variant)
        self.assertEqual([
            vcfio.VariantCall(name='Sample1',
                              genotype=[0, 1],
                              info={
                                  'GQ': 20,
                                  'HQ': [10, 20]
                              }),
            vcfio.VariantCall(name='Sample2',
                              genotype=[1, 0],
                              info={
                                  'GQ': 10,
                                  'FLAG1': True
                              }),
            vcfio.VariantCall(name='Sample3', genotype=[1, 1]),
            vcfio.VariantCall(name='Sample4', genotype=[1, 0], info={'GQ': 20})
        ], merged_variant.calls)
        self.assertItemsEqual(['A1', 'A2', 'A3'], merged_variant.info.keys())
        self.assertTrue(merged_variant.info['A1'] in ('some data',
                                                      'some data2'))
        self.assertEqual(['data1', 'data2'], merged_variant.info['A2'])
        self.assertEqual(['data3', 'data4'], merged_variant.info['A3'])
    def test_get_merged_variants_move_info_to_calls(self):
        strategy = move_to_calls_strategy.MoveToCallsStrategy(
            info_keys_to_move_to_calls_regex='^A1$',
            copy_quality_to_calls=False,
            copy_filter_to_calls=False)
        variants = self._get_sample_variants()

        # Test single variant merge.
        single_merged_variant = strategy.get_merged_variants([variants[0]])[0]
        self.assertEqual([
            vcfio.VariantCall(sample_id=hash_name('Sample1'),
                              genotype=[0, 1],
                              info={
                                  'GQ': 20,
                                  'HQ': [10, 20],
                                  'A1': 'some data'
                              }),
            vcfio.VariantCall(sample_id=hash_name('Sample2'),
                              genotype=[1, 0],
                              info={
                                  'GQ': 10,
                                  'FLAG1': True,
                                  'A1': 'some data'
                              })
        ], single_merged_variant.calls)

        # Test multiple variant merge.
        merged_variant = strategy.get_merged_variants(variants)[0]
        self._assert_common_expected_merged_fields(merged_variant)
        self.assertEqual([
            vcfio.VariantCall(sample_id=hash_name('Sample1'),
                              genotype=[0, 1],
                              info={
                                  'GQ': 20,
                                  'HQ': [10, 20],
                                  'A1': 'some data'
                              }),
            vcfio.VariantCall(sample_id=hash_name('Sample2'),
                              genotype=[1, 0],
                              info={
                                  'GQ': 10,
                                  'FLAG1': True,
                                  'A1': 'some data'
                              }),
            vcfio.VariantCall(sample_id=hash_name('Sample3'),
                              genotype=[1, 1],
                              info={'A1': 'some data2'}),
            vcfio.VariantCall(sample_id=hash_name('Sample4'),
                              genotype=[1, 0],
                              info={
                                  'GQ': 20,
                                  'A1': 'some data2'
                              })
        ], merged_variant.calls)
        self.assertItemsEqual(['A2', 'A3'], merged_variant.info.keys())
        self.assertEqual(['data1', 'data2'], merged_variant.info['A2'])
        self.assertEqual(['data3', 'data4'], merged_variant.info['A3'])
 def test_modify_bigquery_schema_duplicate_keys(self):
     strategy = move_to_calls_strategy.MoveToCallsStrategy(
         info_keys_to_move_to_calls_regex='.*',
         copy_quality_to_calls=True,
         copy_filter_to_calls=True)
     info_keys = [ColumnKeyConstants.CALLS_SAMPLE_ID]
     base_schema = self._get_base_schema(info_keys)
     try:
         strategy.modify_bigquery_schema(base_schema, info_keys)
         self.fail('Duplicate keys should throw error.')
     except ValueError:
         pass
 def test_modify_bigquery_schema_move_info_to_calls(self):
     strategy = move_to_calls_strategy.MoveToCallsStrategy(
         info_keys_to_move_to_calls_regex='INFO.*1',
         copy_quality_to_calls=False,
         copy_filter_to_calls=False)
     info_keys = ['INFO_KEY1', 'INFO_KEY2']
     base_schema = self._get_base_schema(info_keys)
     strategy.modify_bigquery_schema(base_schema, info_keys)
     self.assertEqual([
         ColumnKeyConstants.REFERENCE_NAME, ColumnKeyConstants.QUALITY,
         ColumnKeyConstants.FILTER, ColumnKeyConstants.CALLS, '.'.join([
             ColumnKeyConstants.CALLS, ColumnKeyConstants.CALLS_SAMPLE_ID
         ]), '.'.join([ColumnKeyConstants.CALLS, 'INFO_KEY1']), 'INFO_KEY2'
     ], self._get_fields_from_schema(base_schema))
示例#7
0
 def test_merge_variants(self):
   variant_merger = move_to_calls_strategy.MoveToCallsStrategy(
       '^A1$', False, False)
   variant_list, merged_variant = self._get_sample_merged_variants()
   unmerged_variant_list = self._get_sample_unmerged_variants()
   pipeline = TestPipeline()
   merged_variants = (
       pipeline
       | Create(variant_list + unmerged_variant_list, reshuffle=False)
       | 'MergeVariants' >> merge_variants.MergeVariants(variant_merger))
   assert_that(merged_variants,
               asserts.variants_equal_to_ignore_order([merged_variant] +
                                                      unmerged_variant_list))
   pipeline.run()
示例#8
0
def _get_variant_merge_strategy(known_args  # type: argparse.Namespace
                                ):
    # type: (...) -> Optional(variant_merge_strategy.VariantMergeStrategy)
    merge_options = variant_transform_options.MergeOptions
    if (not known_args.variant_merge_strategy
            or known_args.variant_merge_strategy == merge_options.NONE):
        return None
    elif known_args.variant_merge_strategy == merge_options.MOVE_TO_CALLS:
        return move_to_calls_strategy.MoveToCallsStrategy(
            known_args.info_keys_to_move_to_calls_regex,
            known_args.copy_quality_to_calls, known_args.copy_filter_to_calls)
    elif (known_args.variant_merge_strategy ==
          merge_options.MERGE_WITH_NON_VARIANTS):
        return merge_with_non_variants_strategy.MergeWithNonVariantsStrategy(
            known_args.info_keys_to_move_to_calls_regex,
            known_args.copy_quality_to_calls, known_args.copy_filter_to_calls)
    else:
        raise ValueError('Merge strategy is not supported.')
    def test_get_merged_variants_move_everything_to_calls(self):
        strategy = move_to_calls_strategy.MoveToCallsStrategy(
            info_keys_to_move_to_calls_regex='.*',
            copy_quality_to_calls=True,
            copy_filter_to_calls=True)
        variants = self._get_sample_variants()

        # Test single variant merge.
        single_merged_variant = strategy.get_merged_variants([variants[0]])[0]
        self.assertEqual([
            vcfio.VariantCall(sample_id=hash_name('Sample1'),
                              genotype=[0, 1],
                              info={
                                  'GQ': 20,
                                  'HQ': [10, 20],
                                  'A1': 'some data',
                                  'A2': ['data1', 'data2'],
                                  ColumnKeyConstants.QUALITY: 2,
                                  ColumnKeyConstants.FILTER: ['PASS']
                              }),
            vcfio.VariantCall(sample_id=hash_name('Sample2'),
                              genotype=[1, 0],
                              info={
                                  'GQ': 10,
                                  'FLAG1': True,
                                  'A1': 'some data',
                                  'A2': ['data1', 'data2'],
                                  ColumnKeyConstants.QUALITY: 2,
                                  ColumnKeyConstants.FILTER: ['PASS']
                              })
        ], single_merged_variant.calls)

        merged_variant = strategy.get_merged_variants(variants)[0]
        self._assert_common_expected_merged_fields(merged_variant)
        self.assertEqual([
            vcfio.VariantCall(sample_id=hash_name('Sample1'),
                              genotype=[0, 1],
                              info={
                                  'GQ': 20,
                                  'HQ': [10, 20],
                                  'A1': 'some data',
                                  'A2': ['data1', 'data2'],
                                  ColumnKeyConstants.QUALITY: 2,
                                  ColumnKeyConstants.FILTER: ['PASS']
                              }),
            vcfio.VariantCall(sample_id=hash_name('Sample2'),
                              genotype=[1, 0],
                              info={
                                  'GQ': 10,
                                  'FLAG1': True,
                                  'A1': 'some data',
                                  'A2': ['data1', 'data2'],
                                  ColumnKeyConstants.QUALITY: 2,
                                  ColumnKeyConstants.FILTER: ['PASS']
                              }),
            vcfio.VariantCall(sample_id=hash_name('Sample3'),
                              genotype=[1, 1],
                              info={
                                  'A1': 'some data2',
                                  'A3': ['data3', 'data4'],
                                  ColumnKeyConstants.QUALITY: 20,
                                  ColumnKeyConstants.FILTER: ['q10']
                              }),
            vcfio.VariantCall(sample_id=hash_name('Sample4'),
                              genotype=[1, 0],
                              info={
                                  'GQ': 20,
                                  'A1': 'some data2',
                                  'A3': ['data3', 'data4'],
                                  ColumnKeyConstants.QUALITY: 20,
                                  ColumnKeyConstants.FILTER: ['q10']
                              })
        ], merged_variant.calls)
        self.assertEqual([], merged_variant.info.keys())
    def test_get_merged_variants_move_quality_and_filter_to_calls(self):
        strategy = move_to_calls_strategy.MoveToCallsStrategy(
            info_keys_to_move_to_calls_regex='',
            copy_quality_to_calls=True,
            copy_filter_to_calls=True)
        variants = self._get_sample_variants()

        # Test single variant merge.
        single_merged_variant = strategy.get_merged_variants([variants[0]])[0]
        self.assertEqual([
            vcfio.VariantCall(name='Sample1',
                              genotype=[0, 1],
                              info={
                                  'GQ': 20,
                                  'HQ': [10, 20],
                                  ColumnKeyConstants.QUALITY: 2,
                                  ColumnKeyConstants.FILTER: ['PASS']
                              }),
            vcfio.VariantCall(name='Sample2',
                              genotype=[1, 0],
                              info={
                                  'GQ': 10,
                                  'FLAG1': True,
                                  ColumnKeyConstants.QUALITY: 2,
                                  ColumnKeyConstants.FILTER: ['PASS']
                              })
        ], single_merged_variant.calls)

        # Test multiple variant merge.
        merged_variant = strategy.get_merged_variants(variants)[0]
        self._assert_common_expected_merged_fields(merged_variant)
        self.assertEqual([
            vcfio.VariantCall(name='Sample1',
                              genotype=[0, 1],
                              info={
                                  'GQ': 20,
                                  'HQ': [10, 20],
                                  ColumnKeyConstants.QUALITY: 2,
                                  ColumnKeyConstants.FILTER: ['PASS']
                              }),
            vcfio.VariantCall(name='Sample2',
                              genotype=[1, 0],
                              info={
                                  'GQ': 10,
                                  'FLAG1': True,
                                  ColumnKeyConstants.QUALITY: 2,
                                  ColumnKeyConstants.FILTER: ['PASS']
                              }),
            vcfio.VariantCall(name='Sample3',
                              genotype=[1, 1],
                              info={
                                  ColumnKeyConstants.QUALITY: 20,
                                  ColumnKeyConstants.FILTER: ['q10']
                              }),
            vcfio.VariantCall(name='Sample4',
                              genotype=[1, 0],
                              info={
                                  'GQ': 20,
                                  ColumnKeyConstants.QUALITY: 20,
                                  ColumnKeyConstants.FILTER: ['q10']
                              })
        ], merged_variant.calls)
        self.assertItemsEqual(['A1', 'A2', 'A3'], merged_variant.info.keys())
        self.assertTrue(merged_variant.info['A1'].data in ('some data',
                                                           'some data2'))
        self.assertEqual(vcfio.VariantInfo(['data1', 'data2'], '2'),
                         merged_variant.info['A2'])
        self.assertEqual(vcfio.VariantInfo(['data3', 'data4'], '2'),
                         merged_variant.info['A3'])