示例#1
0
 def _get_header_from_lines(self, lines):
   header = libcbcf.VariantHeader()
   for line in lines[:-1]:
     header.add_line(line)
   return vcf_header_io.VcfHeader(infos=header.info,
                                  filters=header.filters,
                                  alts=header.alts,
                                  formats=header.formats,
                                  contigs=header.contigs)
示例#2
0
def get_vcf_headers(input_file):

    if not FileSystems.exists(input_file):
        raise ValueError('VCF header does not exist')
    header = libcbcf.VariantHeader()
    lines = _header_line_generator(input_file)
    sample_line = None
    header.add_line('##fileformat=VCFv4.0\n')
    file_empty = True
    read_file_format_line = False
    for line in lines:
        if not read_file_format_line:
            read_file_format_line = True
            if line and not line.startswith(
                    vcf_header_io.FILE_FORMAT_HEADER_TEMPLATE.format(
                        VERSION='')):
                header.add_line(
                    vcf_header_io.FILE_FORMAT_HEADER_TEMPLATE.format(
                        VERSION='4.0'))
        if line.startswith('##'):
            header.add_line(line.strip())
            file_empty = False
        elif line.startswith('#'):
            sample_line = line.strip()
            file_empty = False
        elif line:
            # If non-empty non-header line exists, #CHROM line has to be supplied.
            if not sample_line:
                raise ValueError('Header line is missing')
        else:
            if file_empty:
                raise ValueError('File is empty')
            # If no records were found, use dummy #CHROM line for sample extraction.
            if not sample_line:
                sample_line = vcf_header_io.LAST_HEADER_LINE_PREFIX

    return vcf_header_io.VcfHeader(infos=header.info,
                                   filters=header.filters,
                                   alts=header.alts,
                                   formats=header.formats,
                                   contigs=header.contigs,
                                   samples=sample_line,
                                   file_path=input_file)
示例#3
0
def _get_vcf_header_from_lines(lines, file_name=None):
  header = libcbcf.VariantHeader()
  sample_line = LAST_HEADER_LINE_PREFIX
  header.add_line('##fileformat=VCFv4.0')
  for line in lines:
    if line.startswith('#'):
      if line.startswith(LAST_HEADER_LINE_PREFIX):
        sample_line = line.strip()
        break
      header.add_line(line.strip())
    else:
      break
  return VcfHeader(infos=header.info,
                   filters=header.filters,
                   alts=header.alts,
                   formats=header.formats,
                   contigs=header.contigs,
                   samples=sample_line,
                   file_path=file_name)
    def read_records(
            self,
            file_path,  # type: str
            unused_range_tracker,  # type: range_trackers.UnsplittableRangeTracker
    ):
        # type: (...) -> Iterable[VcfHeader]
        header = libcbcf.VariantHeader()
        lines = self._read_headers_plus_one_record(file_path)
        sample_line = None
        read_file_format_line = False
        for line in lines:
            if not read_file_format_line:
                read_file_format_line = True
                if line and not line.startswith(
                        FILE_FORMAT_HEADER_TEMPLATE.format(VERSION='')):
                    header.add_line(
                        FILE_FORMAT_HEADER_TEMPLATE.format(VERSION='4.0'))
            if line.startswith('##'):
                header.add_line(line.strip())
            elif line.startswith('#'):
                sample_line = line
            elif line:
                # If non-empty non-header line exists, #CHROM line has to be supplied.
                if not sample_line:
                    raise ValueError('Header line is missing')
            else:
                # If no records were found, use dummy #CHROM line for sample extraction.
                if not sample_line:
                    sample_line = LAST_HEADER_LINE_PREFIX

        yield VcfHeader(infos=header.info,
                        filters=header.filters,
                        alts=header.alts,
                        formats=header.formats,
                        contigs=header.contigs,
                        samples=sample_line,
                        file_path=file_path)