Пример #1
0
def fieldDic(record, fields):
	dic = {}
	vcf = commonTool.vcf(record)
	for field in fields:
		value = vcf.parseInfo(field)
		dic[field] = value
	return dic
Пример #2
0
def splitVcf(infile, outfile, fields):
    infile = open(infile, 'r')
    outfile = open(outfile, 'w')
    fields = fields.split(",")
    outfile.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n")
    for line in infile:
        if not line.startswith("#"):
            record = line.split("\t")
            dic = fieldDic(record, fields)
            vcf = commonTool.vcf(record)
            if not "," in record[4]:
                output = record[0:7]
                info = ""
                for field in fields:
                    info += "%s=%s;" % (field, dic[field])
                info = info.strip(";")
                output.append(info)
                outfile.write("\t".join(output))
                outfile.write("\n")
            else:
                altAlleles = record[4].split(",")
                n = 0
                while n < len(altAlleles):
                    output = record[0:4] + [altAlleles[n]] + record[5:7]
                    info = ""
                    for field in fields:
                        if "," in dic[field]:
                            info += "%s=%s;" % (field,
                                                dic[field].split(",")[n])
                        else:
                            info += "%s=%s;" % (field, dic[field])
                    info = info.strip(";")
                    output.append(info)
                    newVcf = commonTool.vcf(output)
                    outputLeft = newVcf.leftAlign()
                    outfile.write("\t".join(outputLeft))
                    outfile.write("\n")
                    n += 1
    infile.close()
    outfile.close()
Пример #3
0
def parseIlluminaNextseq(infile, outfile):
    infile = open(infile, 'r')
    outfile = open(outfile, 'w')
    for line in infile:
        if not line.startswith("#"):
            record = line.split("\t")
            if len(record[3]) == len(record[4]):
                type = "snv"
            else:
                type = "indel"
            vcf = commonTool.vcf(record)
            readDepth = vcf.parseInfo("DP")
            altFreq = str(float(vcf.parseDetail("VF", 10)) * 100)
            alleleDepth = vcf.parseDetail("AD", 10)
            altDepth = alleleDepth.split(
                ",")[1].rstrip()  #removes the newline character
            vep = vcf.parseInfo("CSQ").split('|')

            #print vep

            #parse dbsnp and cosmic from vep field
            #CSQ=G|intron_variant|MODIFIER|CSF3R|ENSG00000119535|Transcript|ENST00000373103|protein_coding||15/16|ENST00000373103.1:c.1959-107T>C|||||||||-1|HGNC|2439|||||||||||||	GT:GQ:AD:VF:NL:SB:GQX	0/1:59:827,26:0.0304:20:-30.0099:59
            #CSQ=-|intron_variant|MODIFIER|UBE2N|ENSG00000177889|Transcript|ENST00000318066|protein_coding||1/3|ENST00000318066.2:c.30+3207delc|||||||||-1|HGNC|12492|||||||||||||	GT:VF:DP:AD	0/1:0.047619:42:40,2
            #CSQ=T|intergenic_variant|MODIFIER|||||||||||||||rs749868180||||||||T:0.0130|T:0.0047|T:0.0265|T:0.0000|T:0.0083|T:0.0000||||	GT:VF:DP:AD	0/1:1.000000:37:0,37
            #CSQ=C|intron_variant|MODIFIER|PTEN|ENSG00000171862|Transcript|ENST00000371953|protein_coding||5/8|ENST00000371953.3:c.492+14T>C|||||||COSM14247||1|HGNC|9588|||||||||||1|1|	GT:VF:DP:AD	0/1:0.010707:2802:2772,30
            IDs = vep[17].split("&")

            dbSNP = ""
            cosmic = ""
            for ID in IDs:
                if ID.startswith("rs"):
                    if dbSNP == "":
                        dbSNP = ID
                    else:
                        dbSNP += ",%s" % ID
                elif ID.startswith("COSM"):
                    if cosmic == "":
                        cosmic = ID
                    else:
                        cosmic += ",%s" % cosmic
            #construct output
            output = [
                vep[3], vep[8], record[0], record[1], record[3], record[4],
                vep[2], type, record[5], altFreq, readDepth, altDepth, vep[1],
                vep[22], vep[23], vep[10], vep[11], dbSNP, vep[34]
            ]

            outfile.write("\t".join(output))
            outfile.write("\n")
    infile.close()
    outfile.close()
Пример #4
0
def parseIlluminaNextseq(infile, outfile):
    infile = open(infile, 'r')
    outfile = open(outfile, 'w')
    for line in infile:
        if not line.startswith("#"):
            record = line.split("\t")
            if len(record[3]) == len(record[4]):
                type = "snv"
            else:
                type = "indel"
            vcf = commonTool.vcf(record)
            readDepth = vcf.parseInfo("DP")
            altFreq = str(float(vcf.parseDetail("VF", 10)) * 100)
            alleleDepth = vcf.parseDetail("AD", 10)
            altDepth = alleleDepth.split(
                ",")[1].rstrip()  #removes the newline character
            vep = vcf.parseInfo("CSQ").split('|')

            IDs = vep[17].split("&")

            dbSNP = ""
            cosmic = ""
            for ID in IDs:
                if ID.startswith("rs"):
                    if dbSNP == "":
                        dbSNP = ID
                    else:
                        dbSNP += ",%s" % ID
                elif ID.startswith("COSM"):
                    if cosmic == "":
                        cosmic = ID
                    else:
                        cosmic += ",%s" % ID
            #construct output
            output = [
                vep[3], vep[8], record[0], record[1], record[3], record[4],
                vep[2], type, record[5], altFreq, readDepth, altDepth, vep[1],
                vep[23], vep[24], vep[10], vep[11], dbSNP, vep[35]
            ]

            outfile.write("\t".join(output))
            outfile.write("\n")
    infile.close()
    outfile.close()
Пример #5
0
def parseIonNewVarView(infile, outfile):
    infile = open(infile, 'r')
    outfile = open(outfile, 'w')
    for line in infile:
        if not line.startswith("#"):
            record = line.split("\t")
            if len(record[3]) == len(record[4]):
                type = "snv"
            else:
                type = "indel"
            vcf = commonTool.vcf(record)
            readDepth = vcf.parseInfo("FDP")
            altFreq = str(float(vcf.parseInfo("AF")) * 100)
            altDepth = vcf.parseInfo("FAO")
            vep = vcf.parseInfo("CSQ").split('|')
            #parse dbsnp and cosmic from vep field
            IDs = vep[17].split("&")
            dbSNP = ""
            cosmic = ""
            for ID in IDs:
                if ID.startswith("rs"):
                    if dbSNP == "":
                        dbSNP = ID
                    else:
                        dbSNP += ",%s" % ID
                elif ID.startswith("COSM"):
                    if cosmic == "":
                        cosmic = ID
                    else:
                        cosmic += ",%s" % ID

            globalMinorAllele = ""
            globalMinorAleleFreq = ""
            alleleFreqAmr = ""
            alleleFreqAsn = ""
            alleleFreqAfr = ""
            alleleFreqEur = ""
            try:
                globalMinorAllele = vep[25].split(":")[0]
                globalMinorAleleFreq = str(float(vep[25].split(":")[1]) * 100)
                alleleFreqAmr = str(
                    float(vep[27].split("&")[0].split(":")[1]) * 100)
                alleleFreqAsn = str(
                    float(vep[28].split("&")[0].split(":")[1]) * 100)
                alleleFreqAfr = str(
                    float(vep[26].split("&")[0].split(":")[1]) * 100)
                alleleFreqEur = str(
                    float(vep[29].split("&")[0].split(":")[1]) * 100)
            except IndexError:
                pass
            #construct output
            output = [
                vep[3], vep[8], record[0], record[1], record[3], record[4],
                vep[2], type, record[5], altFreq, readDepth, altDepth, vep[1],
                vep[23], vep[24], vep[10], vep[11], dbSNP, vep[35]
            ]

            outfile.write("\t".join(output))
            outfile.write("\n")
    infile.close()
    outfile.close()