def main(): f = omim.input(sys.argv[1]) av_count = 0 for record in f: if record.allelic_variants is not None: av_count += len(record.allelic_variants) print av_count
def main(): # return if we don't have the correct arguments if len(sys.argv) < 2: raise SystemExit(__doc__.replace("%prog", sys.argv[0])) f = omim.input(sys.argv[1]) av_count = del_count = dup_count = ins_count = ivs_count = fs_count = unknown_count = 0 for record in f: if record.allelic_variants is not None: av_count += len(record.allelic_variants) for variant in record.allelic_variants: if variant.mutation is not None: if variant.mutation.find("DEL") >= 0: del_count += 1 elif variant.mutation.find("DUP") >= 0: dup_count += 1 elif variant.mutation.find("INS") >= 0: ins_count += 1 elif variant.mutation.find("IVS") >= 0: ivs_count += 1 elif variant.mutation.find("FS") >= 0: fs_count += 1 else: # if we don't have a specified point mutation, class it as unknown try: aa_pos = int(variant.mutation[3:-3]) except ValueError: unknown_count += 1 continue # reformat the reference and mutant amino acids ref_aa = variant.mutation[0:3].title() if ref_aa == "Ter": ref_aa = "TERM" elif ref_aa not in one_letter_alphabet.keys(): unknown_count += 1 continue mut_aa = variant.mutation[-3:].title() if mut_aa == "Ter": mut_aa = "TERM" elif mut_aa not in one_letter_alphabet.keys(): unknown_count += 1 continue # process and print the phenotypes phenotypes = _process_variant_title(variant.title) for p in phenotypes: print "%s\t%s\t%s-%s\t%s\t%s\tomim:%s%s" % (p, variant.gene, ref_aa, mut_aa, aa_pos, variant.text.count(" ") + 1, record.number, variant.number) print >> sys.stderr, "TOTAL VARIANTS PARSED:", av_count, "DEL:", del_count, print >> sys.stderr, "DUP:", dup_count, "INS:", ins_count, print >> sys.stderr, "IVS:", ivs_count, "FS:", fs_count, print >> sys.stderr, "UNKNOWN:", unknown_count