Exemplo n.º 1
0
 FP = glob.glob(os.path.join(inputdir, '*.fastq.*'))
 if len(FP) == 0:
     # single file per-sample
     FP = glob.glob(os.path.join(inputdir, '*.fastq'))
 file_prefix = FP[fr]
 file_split = file_prefix[file_prefix.index('.fastq') + 6:]
 file_prefix = file_prefix[file_prefix.rfind('/') +
                           1:file_prefix.index('.fastq')]
 hashobject = Fastq_Reader(inputdir, outputdir)
 f = open(hashobject.input_path + file_prefix + '.fastq' + file_split, 'r')
 read_type = hashobject.id_type(f)
 g = gzip.open(
     hashobject.output_path + file_prefix + '.hashq' + file_split + '.gz',
     'wb')
 hashobject.hpfx = hashobject.hpfx + str(hashobject.kmer_size) + ','
 A = []
 reads_hashed = 0
 while A != None:
     try:
         A, B = hashobject.generator_to_bins(hashobject.read_generator(
             f, max_reads=25000, verbose_ids=True),
                                             rc=do_reverse_compliment)
         for b in range(len(B)):
             reads_hashed += kmer_bins(B[b], A, hashobject.hpfx, g,
                                       read_type)
     except Exception, err:
         pass
         #print str(err)
 f.close()
 g.close()
 print 'total reads hashed:', reads_hashed
		elif opt in ('-o','--outputdir'):
			outputdir = arg
			if outputdir[-1] != '/':
				outputdir += '/'
		elif opt in ('-z','--reversecomp'):
			do_reverse_compliment = False
	FP = glob.glob(os.path.join(inputdir,'*.fastq.*'))
	if len(FP) == 0:
		# single file per-sample
		FP = glob.glob(os.path.join(inputdir,'*.fastq'))
	file_prefix = FP[fr]
	file_split = file_prefix[file_prefix.index('.fastq')+6:]
	file_prefix = file_prefix[file_prefix.rfind('/')+1:file_prefix.index('.fastq')]
	hashobject = Fastq_Reader(inputdir,outputdir)
	f = open(hashobject.input_path+file_prefix+'.fastq'+file_split,'r')
	read_type = hashobject.id_type(f)
	g = gzip.open(hashobject.output_path+file_prefix+'.hashq'+file_split+'.gz','wb')
	hashobject.hpfx = hashobject.hpfx + str(hashobject.kmer_size)+','
	A = []
	reads_hashed = 0
	while A != None:
		try:
			A,B = hashobject.generator_to_bins(hashobject.read_generator(f,max_reads=25000,verbose_ids=True),rc=do_reverse_compliment)
			for b in range(len(B)):
				reads_hashed += kmer_bins(B[b],A,hashobject.hpfx,g,read_type)
		except Exception,err:
			pass
			#print str(err)
	f.close()
	g.close()
	print 'total reads hashed:',reads_hashed
Exemplo n.º 3
0
    FP = glob.glob(os.path.join(input_dir, '*.fastq.*'))
    if len(FP) == 0:
        # single file per-sample
        FP = glob.glob(os.path.join(input_dir, '*.fastq'))

    file_prefix = FP[task_rank]
    file_split = file_prefix[file_prefix.index('.fastq') + 6:]
    file_prefix = file_prefix[file_prefix.rfind('/') +
                              1:file_prefix.index('.fastq')]

    hashobject = Fastq_Reader(input_dir, output_dir)
    reads_file_name = hashobject.input_path + file_prefix + '.fastq' + file_split

    with Fq.open_gz(reads_file_name) as f:
        hashobject.quality_codes = Fq.set_quality_codes(reads_file_name)
        print(reads_file_name)
        with gzip.open(
                hashobject.output_path + file_prefix + '.hashq' + file_split +
                '.gz', 'wt') as g:
            IDs = []
            reads_hashed = 0
            print("[HashFastqReads] Starting to hash the reads.")
            IDs, bins = hashobject.generator_to_bins(Fq.fastq_generator(f),
                                                     rc=do_reverse_compliment)

            print("[HashFastqReads] All k-mers hashed.")
            print("[HashFastqReads] Writing hashed reads to file.")
            for b in range(len(bins)):
                reads_hashed += kmer_bins(bins[b], IDs, g)

            print("[HashFastqReads] Total reads hashed: " + str(reads_hashed))