short_name = ALIASES[genome] outdirect = os.path.join(RESULTSDIR, 'sampling', short_name) if not os.path.exists(outdirect): os.mkdir(outdirect) ifile = os.path.join(DATADIR, genome + '.fa') for i in xrange(options.numsamples): outfile = os.path.join(outdirect, short_name + '%05d.txt' % (i+1,)) if os.path.exists(outfile) and not options.forcenew: continue logging.warning('Processing subsample %d' % (i+1,)) fgen = SeqGen(ifile) sub_gen = SampleIterator(fgen, options.percentage) d = DictFromGen(sub_gen, label = short_name, chunk_size = 10) logging .warning('Writting file: %s' % outfile) with open(outfile, 'w') as handle: l = d.keys() l.sort() for key in l: count, frac = d[key] elm, spec = key handle.write('\t'.join(map(str, [elm, spec, count, '%.10f' % frac]))+'\n')
if __name__ == '__main__': parser = OptionParser() parser.add_option('-c', '--use-cloud', default = False, action = 'store_true', dest = 'usecloud', help = 'Use PiCloud computing') (options, args) = parser.parse_args() cloud.setkey(CLOUD_KEY, CLOUD_SECRET) if not options.usecloud: cloud.start_simulator() for short_name in args: logging.warning('Processing Genome: %s' % short_name) flu_gen = GetFluSeqs(organism = FLU_NAMES[short_name]) outdata = DictFromGen(flu_gen, label = short_name) logging.warning('writting data for %s' % short_name) with open(os.path.join(RESULTSDIR, 'flu_elmdict_'+short_name), 'w') as handle: l = outdata.keys() l.sort() for key in l: count, frac = outdata[key] elm, spec = key handle.write('\t'.join(map(str, [elm, spec, count, '%.10f' % frac]))+'\n')