Exemplo n.º 1
0
 def setUp(self):
   self.unindexed_options = variants_pb2.VcfReaderOptions()
   self.indexed_options = variants_pb2.VcfReaderOptions(
       index_mode=index_pb2.INDEX_BASED_ON_FILENAME)
   self.sites_vcf = test_utils.genomics_core_testdata('test_sites.vcf')
   self.samples_vcf = test_utils.genomics_core_testdata('test_samples.vcf.gz')
   self.sites_reader = vcf_reader.VcfReader.from_file(self.sites_vcf,
                                                      self.unindexed_options)
   self.samples_reader = vcf_reader.VcfReader.from_file(
       self.samples_vcf, self.indexed_options)
Exemplo n.º 2
0
    def __init__(self,
                 input_path,
                 use_index=True,
                 excluded_info_fields=None,
                 excluded_format_fields=None):
        """Initializer for NativeVcfReader.

    Args:
      input_path: str. The path to the VCF file to read.
      use_index: bool. If True, the input is assumed to be bgzipped and tabix
        indexed, and the `query` functionality is supported.
      excluded_info_fields: list(str). A list of INFO field IDs that should not
        be parsed into the Variants. If None, all INFO fields are included.
      excluded_format_fields: list(str). A list of FORMAT field IDs that should
        not be parsed into the Variants. If None, all FORMAT fields are
        included.
    """
        super(NativeVcfReader, self).__init__()

        index_mode = index_pb2.INDEX_BASED_ON_FILENAME
        if not use_index:
            index_mode = index_pb2.DONT_USE_INDEX

        self._reader = vcf_reader.VcfReader.from_file(
            input_path.encode('utf8'),
            variants_pb2.VcfReaderOptions(
                index_mode=index_mode,
                excluded_info_fields=excluded_info_fields,
                excluded_format_fields=excluded_format_fields))

        self.header = self._reader.header
        self.field_access_cache = VcfHeaderCache(self.header)
Exemplo n.º 3
0
    def __init__(self,
                 input_path,
                 excluded_info_fields=None,
                 excluded_format_fields=None,
                 store_gl_and_pl_in_info_map=False):
        """Initializer for NativeVcfReader.

    Args:
      input_path: str. The path to the VCF file to read.
      excluded_info_fields: list(str). A list of INFO field IDs that should not
        be parsed into the Variants. If None, all INFO fields are included.
      excluded_format_fields: list(str). A list of FORMAT field IDs that should
        not be parsed into the Variants. If None, all FORMAT fields are
        included.
      store_gl_and_pl_in_info_map: bool. If True, the "GL" and "PL" FORMAT
        fields are stored in the VariantCall.info map rather than as top-level
        values in the VariantCall.genotype_likelihood field.
    """
        super(NativeVcfReader, self).__init__()

        self._reader = vcf_reader.VcfReader.from_file(
            input_path.encode('utf8'),
            variants_pb2.VcfReaderOptions(
                excluded_info_fields=excluded_info_fields,
                excluded_format_fields=excluded_format_fields,
                store_gl_and_pl_in_info_map=store_gl_and_pl_in_info_map,
            ))

        self.header = self._reader.header
        self.field_access_cache = VcfHeaderCache(self.header)
Exemplo n.º 4
0
 def setUp(self):
   self.sites_vcf = test_utils.genomics_core_testdata('test_sites.vcf')
   self.samples_vcf = test_utils.genomics_core_testdata('test_samples.vcf.gz')
   self.options = variants_pb2.VcfReaderOptions()
   self.sites_reader = vcf_reader.VcfReader.from_file(self.sites_vcf,
                                                      self.options)
   self.samples_reader = vcf_reader.VcfReader.from_file(
       self.samples_vcf, self.options)
Exemplo n.º 5
0
  def _init_with_header(self, header_lines):
    # The first header line must be similar to '##fileformat=VCFv.*'.
    if header_lines and not header_lines[0].startswith(
        FILE_FORMAT_HEADER_TEMPLATE.format(VERSION='')):
      header_lines.insert(0, FILE_FORMAT_HEADER_TEMPLATE.format(VERSION='4.0'))

    try:
      self._vcf_reader = nucleus_vcf_reader.VcfReader.from_file(
          self._store_to_temp_local_file(header_lines),
          variants_pb2.VcfReaderOptions())  # pylint: disable=c-extension-no-member
    except ValueError as e:
      raise ValueError(
          'Invalid VCF header in %s: %s' % (self._file_name, str(e)))
    self._extract_header_fields()