def setUp(self): self.unindexed_options = variants_pb2.VcfReaderOptions() self.indexed_options = variants_pb2.VcfReaderOptions( index_mode=index_pb2.INDEX_BASED_ON_FILENAME) self.sites_vcf = test_utils.genomics_core_testdata('test_sites.vcf') self.samples_vcf = test_utils.genomics_core_testdata('test_samples.vcf.gz') self.sites_reader = vcf_reader.VcfReader.from_file(self.sites_vcf, self.unindexed_options) self.samples_reader = vcf_reader.VcfReader.from_file( self.samples_vcf, self.indexed_options)
def __init__(self, input_path, use_index=True, excluded_info_fields=None, excluded_format_fields=None): """Initializer for NativeVcfReader. Args: input_path: str. The path to the VCF file to read. use_index: bool. If True, the input is assumed to be bgzipped and tabix indexed, and the `query` functionality is supported. excluded_info_fields: list(str). A list of INFO field IDs that should not be parsed into the Variants. If None, all INFO fields are included. excluded_format_fields: list(str). A list of FORMAT field IDs that should not be parsed into the Variants. If None, all FORMAT fields are included. """ super(NativeVcfReader, self).__init__() index_mode = index_pb2.INDEX_BASED_ON_FILENAME if not use_index: index_mode = index_pb2.DONT_USE_INDEX self._reader = vcf_reader.VcfReader.from_file( input_path.encode('utf8'), variants_pb2.VcfReaderOptions( index_mode=index_mode, excluded_info_fields=excluded_info_fields, excluded_format_fields=excluded_format_fields)) self.header = self._reader.header self.field_access_cache = VcfHeaderCache(self.header)
def __init__(self, input_path, excluded_info_fields=None, excluded_format_fields=None, store_gl_and_pl_in_info_map=False): """Initializer for NativeVcfReader. Args: input_path: str. The path to the VCF file to read. excluded_info_fields: list(str). A list of INFO field IDs that should not be parsed into the Variants. If None, all INFO fields are included. excluded_format_fields: list(str). A list of FORMAT field IDs that should not be parsed into the Variants. If None, all FORMAT fields are included. store_gl_and_pl_in_info_map: bool. If True, the "GL" and "PL" FORMAT fields are stored in the VariantCall.info map rather than as top-level values in the VariantCall.genotype_likelihood field. """ super(NativeVcfReader, self).__init__() self._reader = vcf_reader.VcfReader.from_file( input_path.encode('utf8'), variants_pb2.VcfReaderOptions( excluded_info_fields=excluded_info_fields, excluded_format_fields=excluded_format_fields, store_gl_and_pl_in_info_map=store_gl_and_pl_in_info_map, )) self.header = self._reader.header self.field_access_cache = VcfHeaderCache(self.header)
def setUp(self): self.sites_vcf = test_utils.genomics_core_testdata('test_sites.vcf') self.samples_vcf = test_utils.genomics_core_testdata('test_samples.vcf.gz') self.options = variants_pb2.VcfReaderOptions() self.sites_reader = vcf_reader.VcfReader.from_file(self.sites_vcf, self.options) self.samples_reader = vcf_reader.VcfReader.from_file( self.samples_vcf, self.options)
def _init_with_header(self, header_lines): # The first header line must be similar to '##fileformat=VCFv.*'. if header_lines and not header_lines[0].startswith( FILE_FORMAT_HEADER_TEMPLATE.format(VERSION='')): header_lines.insert(0, FILE_FORMAT_HEADER_TEMPLATE.format(VERSION='4.0')) try: self._vcf_reader = nucleus_vcf_reader.VcfReader.from_file( self._store_to_temp_local_file(header_lines), variants_pb2.VcfReaderOptions()) # pylint: disable=c-extension-no-member except ValueError as e: raise ValueError( 'Invalid VCF header in %s: %s' % (self._file_name, str(e))) self._extract_header_fields()