Exemplo n.º 1
0
        # write the chr\tpos\t... line
        combined_f.write(vcf_list[0].header)

        # Write the variants
        for v_key in combined_variants:
            callers_indexes = variant_to_vcf_dict[v_key]
            callers_names = [callers[i] for i in callers_indexes]
            info_dict = OrderedDict()
            for i in callers_indexes:
                # Combine the selected information in the dictionary
                info_dict.update(vcf_list[i].variants[v_key].info)
            i = callers_indexes[0]
            combined_variant = Variant.combine_info(
                vcf_list[i].variants[v_key], columns_to_keep, callers_names,
                info_dict)
            combined_f.write(Variant.write(combined_variant))

else:

    # Process each vcf and extract the information from the selected columns
    vcf_list = [
        NormalisedVcf(vcf).process_somatic_vcf(columns_to_keep, normal_id,
                                               tumor_id) for vcf in vcf_in
    ]

    # Combine the variants into a list
    combined_variants, variant_to_vcf_dict = [], dd(list)
    callers = [vcf.caller for vcf in vcf_list]

    if args.priority:
        if set(callers) != set(args.priority):
Exemplo n.º 2
0
            # Write the header lines
            if line.startswith('##'):
                f_vcf_out.write(line)
            elif line.startswith('#'):
                for new_line in BAM_STATS_LINES:
                    f_vcf_out.write(new_line)
                f_vcf_out.write(line)

            # Calcualte bam stats for variants
            else:
                variant = Variant().read_variant(line)
                mpileup = mpileup_dict.get(
                    '\t'.join([variant.chr, variant.pos]), '')
                bam_stats = variant.cal_bam_stats(mpileup)
                variant = variant.add_bam_stats(bam_stats)
                f_vcf_out.write(variant.write())

else:

    normal_mpileup_dict = create_mpileup_dict(normal_mpileup)
    tumor_mpileup_dict = create_mpileup_dict(tumor_mpileup)

    with open(vcf_in, 'r') as vcf_i, open(vcf_out, 'w') as vcf_o:
        for line in vcf_i:
            if line.startswith('#'):
                if line.startswith('##'):
                    vcf_o.write(line)
                else:
                    for new_line in BAM_STATS_LINES:
                        vcf_o.write(new_line)
                    line = line.split()
with open(args.input, "r") as f_vcf, open(args.output, "w") as f_vcf_out:
    for line in f_vcf:
        # Write the header lines
        if line.startswith("##"):
            f_vcf_out.write(line)
        elif line.startswith("#"):
            f_vcf_out.write(AF_HEADER_LINE)
            f_vcf_out.write(line)

        # Calcualte AF for strelka variants
        else:
            variant = Variant().read_variant(line)
            if "DP" in variant.format.keys():
                if variant.format["DP"] == "0":
                    variant.format["AF"] = "."
                else:
                    variant.format["AF"] = ",".join([
                        str(round(float(i) / float(variant.format["DP"]), 2))
                        for i in variant.format["AD"].split(",")[1:]
                    ])
            else:
                variant.format["DP"] = variant.format["DPI"]
                if variant.format["DPI"] == "0":
                    variant.format["AF"] = "."
                else:
                    variant.format["AF"] = ",".join([
                        str(round(float(i) / float(variant.format["DPI"]), 2))
                        for i in variant.format["AD"].split(",")[1:]
                    ])
            f_vcf_out.write(variant.write())