elif opt in ('-o','--outputdir'):
			outputdir = arg
			if outputdir[-1] != '/':
				outputdir += '/'
		elif opt in ('-z','--reversecomp'):
			do_reverse_compliment = False
	FP = glob.glob(os.path.join(inputdir,'*.fastq.*'))
	if len(FP) == 0:
		# single file per-sample
		FP = glob.glob(os.path.join(inputdir,'*.fastq'))
	file_prefix = FP[fr]
	file_split = file_prefix[file_prefix.index('.fastq')+6:]
	file_prefix = file_prefix[file_prefix.rfind('/')+1:file_prefix.index('.fastq')]
	hashobject = Fastq_Reader(inputdir,outputdir)
	f = open(hashobject.input_path+file_prefix+'.fastq'+file_split,'r')
	read_type = hashobject.id_type(f)
	g = gzip.open(hashobject.output_path+file_prefix+'.hashq'+file_split+'.gz','wb')
	hashobject.hpfx = hashobject.hpfx + str(hashobject.kmer_size)+','
	A = []
	reads_hashed = 0
	while A != None:
		try:
			A,B = hashobject.generator_to_bins(hashobject.read_generator(f,max_reads=25000,verbose_ids=True),rc=do_reverse_compliment)
			for b in range(len(B)):
				reads_hashed += kmer_bins(B[b],A,hashobject.hpfx,g,read_type)
		except Exception,err:
			pass
			#print str(err)
	f.close()
	g.close()
	print 'total reads hashed:',reads_hashed
Exemplo n.º 2
0
         do_reverse_compliment = False
 FP = glob.glob(os.path.join(inputdir, '*.fastq.*'))
 if len(FP) == 0:
     # single file per-sample
     FP = glob.glob(os.path.join(inputdir, '*.fastq'))
 file_prefix = FP[fr]
 file_split = file_prefix[file_prefix.index('.fastq') + 6:]
 file_prefix = file_prefix[file_prefix.rfind('/') +
                           1:file_prefix.index('.fastq')]
 hashobject = Fastq_Reader(inputdir, outputdir)
 f = open(hashobject.input_path + file_prefix + '.fastq' + file_split, 'r')
 read_type = hashobject.id_type(f)
 g = gzip.open(
     hashobject.output_path + file_prefix + '.hashq' + file_split + '.gz',
     'wb')
 hashobject.hpfx = hashobject.hpfx + str(hashobject.kmer_size) + ','
 A = []
 reads_hashed = 0
 while A != None:
     try:
         A, B = hashobject.generator_to_bins(hashobject.read_generator(
             f, max_reads=25000, verbose_ids=True),
                                             rc=do_reverse_compliment)
         for b in range(len(B)):
             reads_hashed += kmer_bins(B[b], A, hashobject.hpfx, g,
                                       read_type)
     except Exception, err:
         pass
         #print str(err)
 f.close()
 g.close()