示例#1
0
    def get_vcf_header(self, sample_name, contigs):
        header = VariantHeader()
        items = [('ID', "PASS"), ('Description', "All filters passed")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "refCall"), ('Description', "Call is homozygous")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "lowGQ"), ('Description', "Low genotype quality")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "lowQUAL"),
                 ('Description', "Low variant call quality")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "conflictPos"), ('Description', "Overlapping record")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "GT"), ('Number', 1), ('Type', 'String'),
                 ('Description', "Genotype")]
        header.add_meta(key='FORMAT', items=items)
        items = [('ID', "GQ"), ('Number', 1), ('Type', 'Float'),
                 ('Description', "Genotype Quality")]
        header.add_meta(key='FORMAT', items=items)
        sqs = self.fasta_handler.get_chromosome_names()

        for sq in sqs:
            if sq not in contigs:
                continue
            sq_id = sq
            ln = self.fasta_handler.get_chromosome_sequence_length(sq)
            header.contigs.add(sq_id, length=ln)

        header.add_sample(sample_name)

        return header
示例#2
0
    def get_vcf_header(self, sample_name):
        header = VariantHeader()
        items = [('ID', "PASS"), ('Description', "All filters passed")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "refCall"), ('Description', "Call is homozygous")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "lowGQ"), ('Description', "Low genotype quality")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "lowQUAL"),
                 ('Description', "Low variant call quality")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "conflictPos"), ('Description', "Overlapping record")]
        header.add_meta(key='FILTER', items=items)
        items = [('ID', "GT"), ('Number', 1), ('Type', 'String'),
                 ('Description', "Genotype")]
        header.add_meta(key='FORMAT', items=items)
        items = [('ID', "GQ"), ('Number', 1), ('Type', 'Float'),
                 ('Description', "Genotype Quality")]
        header.add_meta(key='FORMAT', items=items)
        bam_sqs = self.bam_handler.get_header_sq()
        for sq in bam_sqs:
            id = sq['SN']
            ln = sq['LN']
            items = [('ID', id), ('length', ln)]
            header.add_meta(key='contig', items=items)

        header.add_sample(sample_name)

        return header
示例#3
0
def _get_vcf_header(contigs: List[str]) -> VariantHeader:
    header: VariantHeader = VariantHeader()
    header.add_meta('source', value='valiant')

    for contig in contigs:
        header.add_meta('contig', items=[('ID', contig)])

    # TODO: add contig lengths?
    for info_items in VCF_HEADER_INFO_ITEMS:
        header.add_meta('INFO', items=info_items)

    return header
示例#4
0
def format_header():
    header_info = [
        '##fileformat=VCFv4.2',
        '##assembly=hg19',
        '##FILTER=<ID=PASS,Description="All filters passed">',
        '##INFO=<ID=AAChange_refGene,Number=.,Type=String,Description="AAChange_refGene annotation">',
        # '##FORMAT=<ID=None,Number=R,Type=Integer,Description="None">',
        '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO',
    ]
    header = VariantHeader()
    for line in header_info:
        header.add_line(line)
    return header
def generate_header(reference_fa: str, tag: str) -> VariantHeader:
    """
    Generates the header for the minimal VCF.

    :param reference_fa: Path to reference fasta file.
    :param tag: The filter tag to use.
    """
    header = VariantHeader()
    header.filters.add(tag, None, None, "Failed dToxoG")

    fasta = FastaFile(reference_fa)
    try:
        for contig in fasta.references:
            header.contigs.add(contig,
                               length=fasta.get_reference_length(contig))
    finally:
        fasta.close()

    return header