def run_command(): """Transform human mtDNA sequence to variable sites.""" # Set up the options parser usage = "usage: %prog [options] filename" parser = OptionParser(usage=usage) parser.add_option('-r', '--region', dest='region', default='hvr1', help='which predefined sequence region to generate (default hvr1)' '(one of hvr1, hvr2, hvr1to2, coding, or all)') parser.add_option('-b', '--begin', dest='begin', type='int', default=None, help='define a region to generate (use with --end)') parser.add_option('-e', '--end', dest='end', type='int', default=None, help='define a region to generate (use with --begin)') # Parse the options (options, args) = parser.parse_args() # The filename is always required if len(args) != 1: print 'You must provide a filename!' print "Type 'sites2seq -h' for help." sys.exit(1) # make sure the sites file exists if not os.path.exists(args[0]): print 'ERROR: Could not find file: %s' % args[0] sys.exit(1) count = 0 for entry in csv.reader(open(args[0], 'rU')): count += 1 if len(entry) != 3: print 'ERROR: Problem on row %d of the input file' % count sys.exit(1) name = entry[0] n = int(entry[1]) sites = entry[2] region = options.region if options.begin is not None and options.end is not None: if options.end < options.begin: # wrap through the origin region = range(options.begin, 16570)+range(1, options.end+1) else: region = range(options.begin, options.end+1) sequence = sites2seq(sites, region=region) entry = {'name':name, 'sequence':sequence} for i in range(n): print entry2str(entry)
2668, 3186, 3189, 3916, 3947, 4089, 4092, ] count = 0 of = open(ofn, "w") for entry in open(mtgs_fn, "U"): count += 1 if count not in expect_to_fail: entry = entry.strip() entry = entry.replace("-", "") try: sites = seq2sites(entry) seq = sites2seq(sites, what="all") seq = seq.replace("-", "") if entry != seq: print count, "FAILED" of.write("%d\n" % count) else: print count, "PASSED" except: print count, "FAILED" of.write("%d\n" % count) of.flush() of.close()
here = os.path.abspath(os.path.dirname(__file__)) module_dir = os.path.join(here, '..') genographic_sites_filepath = os.path.join(here, 'genographic_sites.txt') import sys sys.path = [module_dir] + sys.path outfn = os.path.join(here, 'fail_genographic.txt') outf = open(outfn, 'w') from oldowan.mitomotifs import sites2seq from oldowan.mitomotifs import seq2sites from oldowan.mitomotifs import str2sites count = 0 for line in open(genographic_sites_filepath, 'rU'): count += 1 line = line.upper() sites = str2sites(line) sites.sort() seq = sites2seq(sites, region=range(16000,16570)) roundtrip_sites = seq2sites(seq) roundtrip_sites.sort() if sites != roundtrip_sites: outf.write("%d,%s,%s\n" % (count, sites, roundtrip_sites)) else: print "%d,good" % count outf.flush() outf.close()
DLOOPplus = range(15800,16570) + range(1,1500) dloops_fn = os.path.join(here, 'dloops.fasta') ofn = os.path.join(here, 'fail_dloop.txt') fail_fn = os.path.join(here, 'dloop_expect_to_fail.txt') expect_to_fail = [] for line in open(fail_fn, 'U'): expect_to_fail.append(int(line.strip()[:-1])) of = open(ofn, 'w') count = 0 for entry in fasta(dloops_fn): count += 1 if count not in expect_to_fail: try: sites = seq2sites(entry["sequence"]) seq = sites2seq(sites, region=DLOOPplus) seq = seq.replace('-', '') if entry["sequence"] in seq: print count, entry["name"], sites2str(sites) else: print 'FAILED', count, entry["name"] of.write('%d, %s\n' % (count, entry["name"])) except Exception, e: print 'FAILED',e, count, entry["name"] of.write('%d, %s\n' % (count, entry["name"])) of.flush() of.close()