def main(debug=None): args = docopt(__doc__, argv=debug, version=__version__) if args["<vcf>"] == "": print(__doc__) v = vcf(args["<vcf>"]) for line in v.output_raw(): if line.startswith("#CHROM"): line = line.split("\t") if args["--subst"]: find_replace = [re.split("[:=,]", x) for x in args["--subst"]] for orig, replacement in find_replace: for n, sample in enumerate(line[9:]): if sample == orig: line[9 + n] = replacement if args["--prefix"]: line[9:] = [args["--prefix"] + x for x in line[9:]] if args["--suffix"]: line[9:] = [x + args["--suffix"] for x in line[9:]] print('\t'.join(line)) else: print((line.strip()))
ANN_header = [ "allele", "effect", "impact", "gene_name", "gene_id", "feature_type", "feature_id", "transcript_biotype", "exon_intron_rank", "nt_change", "aa_change", "cDNA_position/cDNA_len", "protein_position", "distance_to_feature", "error" ] debug = None if len(sys.argv) == 1: debug = ['genome', "test.vcf.gz"] if __name__ == '__main__': args = docopt(__doc__, argv=debug, options_first=False) module_path = os.path.split(os.path.realpath(__file__))[0] v = vcf(args["<vcf>"]) field_set = [] if len(sys.argv) == 1: print("Specify a VCF File") sys.exit() else: info = [m.groupdict()["id"] for m in r_info.finditer(v.raw_header)] format = [m.groupdict()["id"] for m in r_format.finditer(v.raw_header)] # Construct Query String print_header = "" if args["--print-header"]: print_header = "--print-header" if args["long"]: query_start = repr("%CHROM\t%POS\t%ID\t%REF\t%ALT\t%QUAL\t%FILTER\t" + '\t'.join(['%' + x
def main(debug=None): args = docopt(__doc__, version='VCF-Toolbox v0.1', argv=debug, options_first=False) if args["--soft-filter"] and not args["--mode"]: exit(message("Must Specify --mode with soft-filter")) v = vcf(args["<vcf>"]) n_samples = len(v.samples) * 1.0 f = {} filter_s = [ x for x in list(args.values()) if x in ["REF", "HET", "ALT", "MISSING"] ][0] # Filter by rate or by number? if args["--min"]: direction = "<" if int(float(args["--min"])) != float(args["--min"]): filter_key_min = "r_" + filter_s filter_val_min = float(args["--min"]) filter_type = "FREQUENCY" else: filter_key_min = filter_s filter_val_min = int(float(args["--min"])) filter_type = "COUNT" filter_value = filter_val_min if args["--max"]: direction = ">" if int(float(args["--max"])) != float(args["--max"]): filter_key_max = "r_" + filter_s filter_val_max = float(args["--max"]) filter_type = "FREQUENCY" else: filter_key_max = filter_s filter_val_max = int(float(args["--max"])) filter_type = "COUNT" filter_value = filter_val_max # Output header header = v.raw_header.splitlines() for n, i in enumerate(header): if i.startswith("##FILTER") and args["--soft-filter"]: filter_name = args["--soft-filter"] filter_line = """##FILTER=<ID={filter_name},Description="Apply filter if {filter_type}({filter_s}) {direction} {filter_value}">""".format( **locals()) header.insert(n + 1, filter_line) break header = '\n'.join(header) + "\n" sys.stdout.write(header) for line in v: filtered = False f["ALT"] = line.num_hom_alt f["HET"] = line.num_het f["REF"] = line.num_hom_ref f["MISSING"] = int(n_samples - line.num_called) f["r_ALT"] = f["ALT"] / n_samples f["r_HET"] = f["HET"] / n_samples f["r_REF"] = f["REF"] / n_samples f["r_MISSING"] = f["MISSING"] / n_samples if args["--min"]: if f[filter_key_min] < filter_val_min: filtered = True if args["--max"]: if f[filter_key_max] > filter_val_max: filtered = True if args["--soft-filter"]: line = str(line).split("\t") if args["--mode"] == "x": line[6] = "PASS" if filtered is False: sys.stdout.write('\t'.join(line)) else: if args["--mode"] == "+": if line[6] == "PASS": line[6] = "" line[6] = ';'.join([line[6]] + [args["--soft-filter"]]).strip(";") elif args["--mode"] == "x": line[6] = args["--soft-filter"] sys.stdout.write('\t'.join(line)) elif filtered is False: sys.stdout.write(str(line))