def augment_header(header: VariantHeader, contigs: List[str], formats: List[str], infos: List[str]): """ Add contigs, formats and infos to a VariantHeader. formats and infos are given as a list of strings, where each item is the ID of the header line to add. The full header info (Number, Type, Description) is taken from the PREDEFINED_* constants above. Any other FORMATs or INFOs that are not predefined will raise a VcfError. The header is modified in place. """ for contig in contigs: header.contigs.add(contig) for fmt in formats: if fmt in header.formats: header.formats[fmt].remove_header() try: h = PREDEFINED_FORMATS[fmt] except KeyError: raise VcfError("FORMAT {!r} not defined in VCF header".format(fmt)) from None header.add_line(h.line()) for info in infos: try: h = PREDEFINED_INFOS[info] except KeyError: raise VcfError("INFO {!r} not defined in VCF header".format(info)) from None header.add_line(h.line())
def setup_header(self, header: VariantHeader): """Called by baseclass constructor""" # FreeBayes adds phasing=none to its VCF output - remove that. for hr in header.records: if hr.key == "phasing": hr.remove() break header.add_line(PREDEFINED_FORMATS[self.tag].line())
def format_header(): header_info = [ '##fileformat=VCFv4.2', '##assembly=hg19', '##FILTER=<ID=PASS,Description="All filters passed">', '##INFO=<ID=AAChange_refGene,Number=.,Type=String,Description="AAChange_refGene annotation">', # '##FORMAT=<ID=None,Number=R,Type=Integer,Description="None">', '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO', ] header = VariantHeader() for line in header_info: header.add_line(line) return header
def _get_vcf_header(contigs: List[str]) -> VariantHeader: header: VariantHeader = VariantHeader() header.add_meta('source', value='valiant') for contig in contigs: header.add_meta('contig', items=[('ID', contig)]) # TODO: add contig lengths? for info_items in VCF_HEADER_INFO_ITEMS: header.add_meta('INFO', items=info_items) return header
def setup_header(self, header: VariantHeader): """Called by baseclass constructor""" header.add_line( '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype computed by WhatsHap genotyping algorithm">' ) header.add_line( '##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Phred-scaled genotype quality computed by WhatsHap genotyping algorithm">' ) header.add_line( '##FORMAT=<ID=GL,Number=G,Type=Float,Description="Log10-scaled likelihoods for genotypes: 0/0, 0/1, 1/1, computed by WhatsHap genotyping algorithm">' )
def generate_header(reference_fa: str, tag: str) -> VariantHeader: """ Generates the header for the minimal VCF. :param reference_fa: Path to reference fasta file. :param tag: The filter tag to use. """ header = VariantHeader() header.filters.add(tag, None, None, "Failed dToxoG") fasta = FastaFile(reference_fa) try: for contig in fasta.references: header.contigs.add(contig, length=fasta.get_reference_length(contig)) finally: fasta.close() return header
def get_vcf_header(self, sample_name, contigs): header = VariantHeader() items = [('ID', "PASS"), ('Description', "All filters passed")] header.add_meta(key='FILTER', items=items) items = [('ID', "refCall"), ('Description', "Call is homozygous")] header.add_meta(key='FILTER', items=items) items = [('ID', "lowGQ"), ('Description', "Low genotype quality")] header.add_meta(key='FILTER', items=items) items = [('ID', "lowQUAL"), ('Description', "Low variant call quality")] header.add_meta(key='FILTER', items=items) items = [('ID', "conflictPos"), ('Description', "Overlapping record")] header.add_meta(key='FILTER', items=items) items = [('ID', "GT"), ('Number', 1), ('Type', 'String'), ('Description', "Genotype")] header.add_meta(key='FORMAT', items=items) items = [('ID', "GQ"), ('Number', 1), ('Type', 'Float'), ('Description', "Genotype Quality")] header.add_meta(key='FORMAT', items=items) sqs = self.fasta_handler.get_chromosome_names() for sq in sqs: if sq not in contigs: continue sq_id = sq ln = self.fasta_handler.get_chromosome_sequence_length(sq) header.contigs.add(sq_id, length=ln) header.add_sample(sample_name) return header
def get_vcf_header(self, sample_name): header = VariantHeader() items = [('ID', "PASS"), ('Description', "All filters passed")] header.add_meta(key='FILTER', items=items) items = [('ID', "refCall"), ('Description', "Call is homozygous")] header.add_meta(key='FILTER', items=items) items = [('ID', "lowGQ"), ('Description', "Low genotype quality")] header.add_meta(key='FILTER', items=items) items = [('ID', "lowQUAL"), ('Description', "Low variant call quality")] header.add_meta(key='FILTER', items=items) items = [('ID', "conflictPos"), ('Description', "Overlapping record")] header.add_meta(key='FILTER', items=items) items = [('ID', "GT"), ('Number', 1), ('Type', 'String'), ('Description', "Genotype")] header.add_meta(key='FORMAT', items=items) items = [('ID', "GQ"), ('Number', 1), ('Type', 'Float'), ('Description', "Genotype Quality")] header.add_meta(key='FORMAT', items=items) bam_sqs = self.bam_handler.get_header_sq() for sq in bam_sqs: id = sq['SN'] ln = sq['LN'] items = [('ID', id), ('length', ln)] header.add_meta(key='contig', items=items) header.add_sample(sample_name) return header