示例#1
0
		
		short_name = ALIASES[genome]
		outdirect = os.path.join(RESULTSDIR, 'sampling', short_name)
		if not os.path.exists(outdirect):
			os.mkdir(outdirect)
		ifile = os.path.join(DATADIR, genome + '.fa')
		for i in xrange(options.numsamples):
			outfile = os.path.join(outdirect, short_name + '%05d.txt' % (i+1,))
			if os.path.exists(outfile) and not options.forcenew:
				continue
			
			logging.warning('Processing subsample %d' % (i+1,))
			
			fgen = SeqGen(ifile)
			sub_gen = SampleIterator(fgen, options.percentage)
			d = DictFromGen(sub_gen, label = short_name, chunk_size = 10)
			
			
			logging .warning('Writting file: %s' % outfile)
			with open(outfile, 'w') as handle:
				l = d.keys()
				l.sort()
				for key in l:
					count, frac = d[key]
					elm, spec = key
					handle.write('\t'.join(map(str, [elm, spec, count, '%.10f' % frac]))+'\n')
				
				
	
	
	
示例#2
0
if __name__ == '__main__':
	
	parser = OptionParser()
	
	parser.add_option('-c', '--use-cloud', default = False, action = 'store_true',
						dest = 'usecloud', help = 'Use PiCloud computing')
	
						
	(options, args) = parser.parse_args()
	
	cloud.setkey(CLOUD_KEY, CLOUD_SECRET)
	if not options.usecloud: cloud.start_simulator()
	
	for short_name in args:
		logging.warning('Processing Genome: %s' % short_name)
		flu_gen = GetFluSeqs(organism = FLU_NAMES[short_name])
		outdata = DictFromGen(flu_gen, label = short_name)
	
		logging.warning('writting data for %s' % short_name)
		with open(os.path.join(RESULTSDIR, 'flu_elmdict_'+short_name), 'w') as handle:
			l = outdata.keys()
			l.sort()
			for key in l:
				count, frac = outdata[key]
				elm, spec = key
				handle.write('\t'.join(map(str, [elm, spec, count, '%.10f' % frac]))+'\n')