def passthrough(args): """ %prog passthrough chrY.vcf chrY.new.vcf Pass through Y and MT vcf. """ p = OptionParser(passthrough.__doc__) opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) vcffile, newvcffile = args fp = open(vcffile) fw = open(newvcffile, "w") gg = ["0/0", "0/1", "1/1"] for row in fp: if row[0] == "#": print(row.strip(), file=fw) continue v = VcfLine(row) v.filter = "PASS" v.format = "GT:GP" probs = [0] * 3 probs[gg.index(v.genotype)] = 1 v.genotype = v.genotype.replace("/", "|") + \ ":{0}".format(",".join("{0:.3f}".format(x) for x in probs)) print(v, file=fw) fw.close()
def validate(args): """ %prog validate imputed.vcf withheld.vcf Validate imputation against withheld variants. """ p = OptionParser(validate.__doc__) opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) imputed, withheld = args register = {} fp = open(withheld) for row in fp: if row[0] == "#": continue v = VcfLine(row) register[(v.seqid, v.pos)] = v.genotype logging.debug("Imported {0} records from `{1}`".\ format(len(register), withheld)) fp = must_open(imputed) hit = concordant = 0 seen = set() for row in fp: if row[0] == "#": continue v = VcfLine(row) chr, pos, genotype = v.seqid, v.pos, v.genotype if (chr, pos) in seen: continue seen.add((chr, pos)) if (chr, pos) not in register: continue truth = register[(chr, pos)] imputed = genotype.split(":")[0] if "|" in imputed: imputed = "/".join(sorted(genotype.split(":")[0].split("|"))) #probs = [float(x) for x in genotype.split(":")[-1].split(",")] #imputed = max(zip(probs, ["0/0", "0/1", "1/1"]))[-1] hit += 1 if truth == imputed: concordant += 1 else: print(row.strip(), "truth={0}".format(truth), file=sys.stderr) logging.debug("Total concordant: {0}".\ format(percentage(concordant, hit)))
def mitocompile(args): """ %prog mitcompile *.vcf.gz Extract information about deletions in vcf file. """ from jcvi.formats.vcf import VcfLine from six.moves.urllib.parse import parse_qsl p = OptionParser(mitocompile.__doc__) opts, args = p.parse_args(args) if len(args) < 1: sys.exit(not p.print_help()) vcfs = args print("\t".join("vcf samplekey depth seqid pos alt svlen pe sr".split())) for i, vcf in enumerate(vcfs): if (i + 1) % 100 == 0: logging.debug("Process `{}` [{}]".format(vcf, percentage(i + 1, len(vcfs)))) depthfile = vcf.replace(".sv.vcf.gz", ".depth") fp = must_open(depthfile) chrm, depth = fp.next().split() depth = int(float(depth)) samplekey = op.basename(vcf).split("_")[0] fp = must_open(vcf) for row in fp: if row[0] == "#": continue v = VcfLine(row) info = dict(parse_qsl(v.info)) print( "\t".join( str(x) for x in ( vcf, samplekey, depth, v.seqid, v.pos, v.alt, info.get("SVLEN"), info["PE"], info["SR"], ) ) )