Exemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("clinVarXmlFilename")
    parser.add_argument('-a', "--assembly", default="GRCh38")
    args = parser.parse_args()

    printHeader()

    inputBuffer = ""
    with open(args.clinVarXmlFilename) as inputFile:
        inClinVarSet = False
        for line in inputFile:
            #print "inClinVarSet", inClinVarSet, " input:", line.rstrip()
            if "<ClinVarSet" in line:
                inHeader = False
                inputBuffer = line
                inClinVarSet = True
            elif "</ClinVarSet>" in line:
                inputBuffer += line
                inClinVarSet = False
                cvs = ET.fromstring(inputBuffer)
                if clinvar.isCurrent(cvs):
                    submissionSet = clinvar.clinVarSet(cvs)
                    processSubmission(submissionSet, args.assembly)
                inputBuffer = None
            elif inClinVarSet:
                inputBuffer += line
Exemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("clinVarXmlFilename")
    parser.add_argument('-a', "--assembly", default="GRCh38")
    args = parser.parse_args()

    printHeader()

    inputBuffer = ""
    with open(args.clinVarXmlFilename) as inputFile:
        inClinVarSet = False
        for line in inputFile:
            if "<ClinVarSet" in line:
                inHeader = False
                inputBuffer = line
                inClinVarSet = True
            elif "</ClinVarSet>" in line:
                inputBuffer += line
                inClinVarSet = False
                cvs = ET.fromstring(inputBuffer)
                if clinvar.isCurrent(cvs):
                    submissionSet = clinvar.clinVarSet(cvs)
                    processSubmission(submissionSet, args.assembly)
                inputBuffer = None
            elif inClinVarSet:
                inputBuffer += line
Exemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("clinVarXmlFilename")
    args = parser.parse_args()
    inputBuffer = ""

    with gzip.open(args.clinVarXmlFilename) as inputFile:
        inClinVarSet = False
        for line in inputFile:
            #print "inClinVarSet", inClinVarSet, " input:", line.rstrip()
            if "<ClinVarSet" in line:
                inHeader = False
                inputBuffer = line
                inClinVarSet = True
            elif "</ClinVarSet>" in line:
                inputBuffer += line
                inClinVarSet = False
                cvs = ET.fromstring(inputBuffer)
                if clinvar.isCurrent(cvs):
                    submissionSet = clinvar.clinVarSet(cvs)
                    variant = submissionSet.referenceAssertion.variant
                    if variant != None:
                        if variant.geneSymbol == "BRCA1" or variant.geneSymbol == "BRCA2":
                            print inputBuffer
                inputBuffer = None
            elif inClinVarSet:
                inputBuffer += line
            else:
                if len(line) > 1:
                    print line.rstrip()
Exemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("clinVarXmlFilename")
    args = parser.parse_args()
    inputBuffer = ""

    with gzip.open(args.clinVarXmlFilename) as inputFile:
        inClinVarSet = False
        for line in inputFile:
            #print "inClinVarSet", inClinVarSet, " input:", line.rstrip()
            if "<ClinVarSet" in line:
                inHeader = False
                inputBuffer = line
                inClinVarSet = True
            elif "</ClinVarSet>" in line:
                inputBuffer += line
                inClinVarSet = False
                cvs = ET.fromstring(inputBuffer)
                if clinvar.isCurrent(cvs):
                    submissionSet = clinvar.clinVarSet(cvs)
                    variant = submissionSet.referenceAssertion.variant
                    if variant != None:
                        if variant.geneSymbol == "BRCA1" or variant.geneSymbol == "BRCA2":
                            print inputBuffer
                inputBuffer = None
            elif inClinVarSet:
                inputBuffer += line
            else:
                if len(line) > 1:
                    print line.rstrip()
Exemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("clinVarXmlFilename")
    parser.add_argument('-a', "--assembly", default="GRCh38")
    args = parser.parse_args()

    printHeader()


    tree = ET.parse(args.clinVarXmlFilename)
    root = tree.getroot()
    for cvs in root.findall("ClinVarSet"):
        if clinvar.isCurrent(cvs):
            submissionSet = clinvar.clinVarSet(cvs)
            ra = submissionSet.referenceAssertion
            for oa in submissionSet.otherAssertions.values():
                submitter = oa.submitter
                if oa.method != "literature only" or oa.submitter == "Counsyl":
                    if oa.origin != "somatic" and oa.clinicalSignificance != "none provided" and oa.clinicalSignificance != "not provided":
                        variant = ra.variant
                        hgvs = re.sub("\(" + "(BRCA[1|2])" + "\)", 
                                      "", variant.name.split()[0])
                        proteinChange = None
                        if variant.attribute.has_key("HGVS, protein, RefSeq"):
                            proteinChange = variant.attribute["HGVS, protein, RefSeq"]
                        if not re.search("^NP", hgvs):
                            chrom = None
                            start = None
                            referenceAllele = None
                            alternateAllele = None
                            genomicCoordinate = "chrNone:None:None>None"
                            if args.assembly in variant.coordinates:
                                genomicData = variant.coordinates[args.assembly]
                                chrom = genomicData.chrom
                                start = genomicData.start
                                referenceAllele = genomicData.referenceAllele
                                alternateAllele = genomicData.alternateAllele
                                genomicCoordinate = "chr%s:%s:%s>%s" % (chrom,
                                                        start, referenceAllele,
                                                        alternateAllele)
                            print("\t".join((str(hgvs), 
                                             str(oa.submitter), 
                                             str(oa.clinicalSignificance),
                                             str(oa.dateLastUpdated),
                                             str(oa.accession),
                                             str(oa.origin),
                                             str(oa.method),
                                             genomicCoordinate,
                                             str(variant.geneSymbol),
                                             str(proteinChange)
                                         )))
Exemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("clinVarXmlFilename")
    args = parser.parse_args()
    tree = ET.parse(args.clinVarXmlFilename)
    root = tree.getroot()
    newRoot = ET.Element(root.tag, attrib=root.attrib)
    for cvs in root.findall("ClinVarSet"):
        if clinvar.isCurrent(cvs):
            submissionSet = clinvar.clinVarSet(cvs)
            variant = submissionSet.referenceAssertion.variant
            if variant != None:
                if variant.geneSymbol == "BRCA1" or variant.geneSymbol == "BRCA2":
                    newRoot.append(cvs)
    print(prettify(newRoot))
Exemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("clinVarXmlFilename")
    args = parser.parse_args()
    tree = ET.parse(args.clinVarXmlFilename)
    root = tree.getroot()
    newRoot = ET.Element(root.tag, attrib=root.attrib)
    for cvs in root.findall("ClinVarSet"):
        if clinvar.isCurrent(cvs):
            submissionSet = clinvar.clinVarSet(cvs)
            variant = submissionSet.referenceAssertion.variant
            if variant != None:
                if variant.geneSymbol == "BRCA1" or variant.geneSymbol == "BRCA2":
                    newRoot.append(cvs)
    print(prettify(newRoot))
Exemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("clinVarXmlFilename")
    parser.add_argument(
        '-a',
        "--artifacts_dir",
        help='Artifacts directory with pipeline artifact files.')
    args = parser.parse_args()

    logging_level = logging.DEBUG
    log_file_path = args.artifacts_dir + "clinvarbrcapy.log"
    logging.basicConfig(filename=log_file_path,
                        filemode="w",
                        level=logging_level)

    inputBuffer = ""

    with gzip.open(args.clinVarXmlFilename) as inputFile:
        inClinVarSet = False
        for line in inputFile:
            if "<ClinVarSet" in line:
                inHeader = False
                inputBuffer = line
                inClinVarSet = True
            elif "</ClinVarSet>" in line:
                inputBuffer += line
                inClinVarSet = False
                cvs = ET.fromstring(inputBuffer)
                if clinvar.isCurrent(cvs):
                    try:
                        submissionSet = clinvar.clinVarSet(cvs)
                    except AttributeError:
                        # TODO: this is a hack to avoid errors, figure out how to correctly handle these reports.
                        logging.debug(
                            "AttributeError running clinvar.clinVarSet(cvs), inputBuffer: %s, cvs: %s",
                            inputBuffer, cvs)
                        continue
                    variant = submissionSet.referenceAssertion.variant
                    if variant != None:
                        if variant.geneSymbol == "BRCA1" or variant.geneSymbol == "BRCA2":
                            print inputBuffer
                inputBuffer = None
            elif inClinVarSet:
                inputBuffer += line
            else:
                if len(line) > 1:
                    print line.rstrip()
Exemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("clinVarXmlFilename")
    parser.add_argument('-a', "--assembly", default="GRCh38")
    args = parser.parse_args()

    printHeader()

    tree = ET.parse(args.clinVarXmlFilename)
    root = tree.getroot()
    for cvs in root.findall("ClinVarSet"):
        if clinvar.isCurrent(cvs):
            submissionSet = clinvar.clinVarSet(cvs)
            ra = submissionSet.referenceAssertion
            for oa in submissionSet.otherAssertions.values():
                submitter = oa.submitter
                variant = ra.variant
                hgvs = re.sub("\(" + "(BRCA[1|2])" + "\)", "",
                              variant.name.split()[0])
                proteinChange = None
                if variant.attribute.has_key("HGVS, protein, RefSeq"):
                    proteinChange = variant.attribute["HGVS, protein, RefSeq"]
                chrom = None
                start = None
                referenceAllele = None
                alternateAllele = None
                if args.assembly in variant.coordinates:
                    genomicData = variant.coordinates[args.assembly]
                    chrom = genomicData.chrom
                    start = genomicData.start
                    referenceAllele = genomicData.referenceAllele
                    alternateAllele = genomicData.alternateAllele
                genomicCoordinate = "chr%s:%s:%s>%s" % (
                    chrom, start, referenceAllele, alternateAllele)
                print("\t".join(
                    (str(hgvs), oa.submitter.encode('utf-8'),
                     str(oa.clinicalSignificance), str(oa.dateLastUpdated),
                     str(oa.accession), str(oa.id), str(oa.origin),
                     str(oa.method), genomicCoordinate,
                     str(variant.geneSymbol), str(proteinChange))))