def _get_header_from_lines(self, lines): header = libcbcf.VariantHeader() for line in lines[:-1]: header.add_line(line) return vcf_header_io.VcfHeader(infos=header.info, filters=header.filters, alts=header.alts, formats=header.formats, contigs=header.contigs)
def get_vcf_headers(input_file): if not FileSystems.exists(input_file): raise ValueError('VCF header does not exist') header = libcbcf.VariantHeader() lines = _header_line_generator(input_file) sample_line = None header.add_line('##fileformat=VCFv4.0\n') file_empty = True read_file_format_line = False for line in lines: if not read_file_format_line: read_file_format_line = True if line and not line.startswith( vcf_header_io.FILE_FORMAT_HEADER_TEMPLATE.format( VERSION='')): header.add_line( vcf_header_io.FILE_FORMAT_HEADER_TEMPLATE.format( VERSION='4.0')) if line.startswith('##'): header.add_line(line.strip()) file_empty = False elif line.startswith('#'): sample_line = line.strip() file_empty = False elif line: # If non-empty non-header line exists, #CHROM line has to be supplied. if not sample_line: raise ValueError('Header line is missing') else: if file_empty: raise ValueError('File is empty') # If no records were found, use dummy #CHROM line for sample extraction. if not sample_line: sample_line = vcf_header_io.LAST_HEADER_LINE_PREFIX return vcf_header_io.VcfHeader(infos=header.info, filters=header.filters, alts=header.alts, formats=header.formats, contigs=header.contigs, samples=sample_line, file_path=input_file)
def _get_vcf_header_from_lines(lines, file_name=None): header = libcbcf.VariantHeader() sample_line = LAST_HEADER_LINE_PREFIX header.add_line('##fileformat=VCFv4.0') for line in lines: if line.startswith('#'): if line.startswith(LAST_HEADER_LINE_PREFIX): sample_line = line.strip() break header.add_line(line.strip()) else: break return VcfHeader(infos=header.info, filters=header.filters, alts=header.alts, formats=header.formats, contigs=header.contigs, samples=sample_line, file_path=file_name)
def read_records( self, file_path, # type: str unused_range_tracker, # type: range_trackers.UnsplittableRangeTracker ): # type: (...) -> Iterable[VcfHeader] header = libcbcf.VariantHeader() lines = self._read_headers_plus_one_record(file_path) sample_line = None read_file_format_line = False for line in lines: if not read_file_format_line: read_file_format_line = True if line and not line.startswith( FILE_FORMAT_HEADER_TEMPLATE.format(VERSION='')): header.add_line( FILE_FORMAT_HEADER_TEMPLATE.format(VERSION='4.0')) if line.startswith('##'): header.add_line(line.strip()) elif line.startswith('#'): sample_line = line elif line: # If non-empty non-header line exists, #CHROM line has to be supplied. if not sample_line: raise ValueError('Header line is missing') else: # If no records were found, use dummy #CHROM line for sample extraction. if not sample_line: sample_line = LAST_HEADER_LINE_PREFIX yield VcfHeader(infos=header.info, filters=header.filters, alts=header.alts, formats=header.formats, contigs=header.contigs, samples=sample_line, file_path=file_path)