Exemplo n.º 1
0
def augment_header(header: VariantHeader, contigs: List[str], formats: List[str], infos: List[str]):
    """
    Add contigs, formats and infos to a VariantHeader.

    formats and infos are given as a list of strings, where each item is the ID of the header
    line to add. The full header info (Number, Type, Description) is taken from the PREDEFINED_*
    constants above. Any other FORMATs or INFOs that are not predefined will raise a VcfError.

    The header is modified in place.
    """
    for contig in contigs:
        header.contigs.add(contig)

    for fmt in formats:
        if fmt in header.formats:
            header.formats[fmt].remove_header()
        try:
            h = PREDEFINED_FORMATS[fmt]
        except KeyError:
            raise VcfError("FORMAT {!r} not defined in VCF header".format(fmt)) from None
        header.add_line(h.line())

    for info in infos:
        try:
            h = PREDEFINED_INFOS[info]
        except KeyError:
            raise VcfError("INFO {!r} not defined in VCF header".format(info)) from None
        header.add_line(h.line())
Exemplo n.º 2
0
    def setup_header(self, header: VariantHeader):
        """Called by baseclass constructor"""

        # FreeBayes adds phasing=none to its VCF output - remove that.
        for hr in header.records:
            if hr.key == "phasing":
                hr.remove()
                break

        header.add_line(PREDEFINED_FORMATS[self.tag].line())
Exemplo n.º 3
0
def format_header():
    header_info = [
        '##fileformat=VCFv4.2',
        '##assembly=hg19',
        '##FILTER=<ID=PASS,Description="All filters passed">',
        '##INFO=<ID=AAChange_refGene,Number=.,Type=String,Description="AAChange_refGene annotation">',
        # '##FORMAT=<ID=None,Number=R,Type=Integer,Description="None">',
        '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO',
    ]
    header = VariantHeader()
    for line in header_info:
        header.add_line(line)
    return header
Exemplo n.º 4
0
def _get_vcf_header(contigs: List[str]) -> VariantHeader:
    header: VariantHeader = VariantHeader()
    header.add_meta('source', value='valiant')

    for contig in contigs:
        header.add_meta('contig', items=[('ID', contig)])

    # TODO: add contig lengths?
    for info_items in VCF_HEADER_INFO_ITEMS:
        header.add_meta('INFO', items=info_items)

    return header
Exemplo n.º 5
0
 def setup_header(self, header: VariantHeader):
     """Called by baseclass constructor"""
     header.add_line(
         '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype computed by WhatsHap genotyping algorithm">'
     )
     header.add_line(
         '##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Phred-scaled genotype quality computed by WhatsHap genotyping algorithm">'
     )
     header.add_line(
         '##FORMAT=<ID=GL,Number=G,Type=Float,Description="Log10-scaled likelihoods for genotypes: 0/0, 0/1, 1/1, computed by WhatsHap genotyping algorithm">'
     )
def generate_header(reference_fa: str, tag: str) -> VariantHeader:
    """
    Generates the header for the minimal VCF.

    :param reference_fa: Path to reference fasta file.
    :param tag: The filter tag to use.
    """
    header = VariantHeader()
    header.filters.add(tag, None, None, "Failed dToxoG")

    fasta = FastaFile(reference_fa)
    try:
        for contig in fasta.references:
            header.contigs.add(contig,
                               length=fasta.get_reference_length(contig))
    finally:
        fasta.close()

    return header
Exemplo n.º 7
0
    def get_vcf_header(self, sample_name, contigs):
        header = VariantHeader()
        items = [('ID', "PASS"), ('Description', "All filters passed")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "refCall"), ('Description', "Call is homozygous")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "lowGQ"), ('Description', "Low genotype quality")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "lowQUAL"),
                 ('Description', "Low variant call quality")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "conflictPos"), ('Description', "Overlapping record")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "GT"), ('Number', 1), ('Type', 'String'),
                 ('Description', "Genotype")]
        header.add_meta(key='FORMAT', items=items)
        items = [('ID', "GQ"), ('Number', 1), ('Type', 'Float'),
                 ('Description', "Genotype Quality")]
        header.add_meta(key='FORMAT', items=items)
        sqs = self.fasta_handler.get_chromosome_names()

        for sq in sqs:
            if sq not in contigs:
                continue
            sq_id = sq
            ln = self.fasta_handler.get_chromosome_sequence_length(sq)
            header.contigs.add(sq_id, length=ln)

        header.add_sample(sample_name)

        return header
Exemplo n.º 8
0
    def get_vcf_header(self, sample_name):
        header = VariantHeader()
        items = [('ID', "PASS"), ('Description', "All filters passed")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "refCall"), ('Description', "Call is homozygous")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "lowGQ"), ('Description', "Low genotype quality")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "lowQUAL"),
                 ('Description', "Low variant call quality")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "conflictPos"), ('Description', "Overlapping record")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "GT"), ('Number', 1), ('Type', 'String'),
                 ('Description', "Genotype")]
        header.add_meta(key='FORMAT', items=items)
        items = [('ID', "GQ"), ('Number', 1), ('Type', 'Float'),
                 ('Description', "Genotype Quality")]
        header.add_meta(key='FORMAT', items=items)
        bam_sqs = self.bam_handler.get_header_sq()
        for sq in bam_sqs:
            id = sq['SN']
            ln = sq['LN']
            items = [('ID', id), ('length', ln)]
            header.add_meta(key='contig', items=items)

        header.add_sample(sample_name)

        return header