Exemplo n.º 1
0
 try:
     opts, args = getopt.getopt(
         sys.argv[1:], 'hi:o:k:s:',
         ["inputdir=", "outputdir=", "kmersize=", "hashsize="])
 except:
     print help_message
     sys.exit(2)
 for opt, arg in opts:
     if opt in ('-h', '--help'):
         print help_message
         sys.exit()
     elif opt in ('-i', '--inputdir'):
         inputdir = arg
         if inputdir[-1] != '/':
             inputdir += '/'
     elif opt in ('-o', '--outputdir'):
         outputdir = arg
         if outputdir[-1] != '/':
             outputdir += '/'
     elif opt in ('-k', '--kmersize'):
         k_size = int(arg)
     elif opt in ('-s', '--hashsize'):
         h_size = int(arg)
 hashobject = Fastq_Reader(inputdir, outputdir, new_hash=(h_size, k_size))
 total_rand_kmers = k_size * h_size * 2
 hashobject.rand_kmers_for_wheel(total_rand_kmers)
 hashobject.set_wheels(wheels=1)
 os.system('rm %s/random_kmers.fastq' % inputdir)
 f = open(outputdir + 'hashParts.txt', 'w')
 f.write('%d\n' % (2**h_size / 10**6 + 1))
 f.close()
Exemplo n.º 2
0
help_message = 'usage example: python merge_hashq_files.py -r 3 -i /project/home/hashed_reads/ -o /project/home/hashed_reads/'
if __name__ == "__main__":
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hr:i:o:',
                                   ["filerank=", "inputdir=", "outputdir="])
    except:
        print help_message
        sys.exit(2)
    for opt, arg in opts:
        if opt in ('-r', '--filerank'):
            fr = int(arg) - 1
        elif opt in ('-i', '--inputdir'):
            inputdir = arg
            if inputdir[-1] != '/':
                inputdir += '/'
        elif opt in ('-o', '--outputdir'):
            outputdir = arg
            if outputdir[-1] != '/':
                outputdir += '/'
    FP = glob.glob(os.path.join(inputdir, '*hashq*'))
    FP = [fp[fp.rfind('/') + 1:] for fp in FP]
    FP = list(set([fp[:fp.index('.')] for fp in FP]))
    file_prefix = FP[fr]
    hashobject = Fastq_Reader(inputdir, outputdir)
    H = hashobject.merge_count_fractions(file_prefix)
    H = np.array(H, dtype=np.uint16)
    nz = np.nonzero(H)[0]
    np.save(hashobject.output_path + file_prefix + '.nonzero.npy', nz)
    print 'sample %s has %d nonzero elements and %d total observed kmers' % (
        file_prefix, len(nz), H.sum())
Exemplo n.º 3
0
help_message = 'usage example: python assembly_summary.py -i /project/home/'
if __name__ == "__main__":
	try:
		opts, args = getopt.getopt(sys.argv[1:],'hi:',["inputdir="])
	except:
		print help_message
		sys.exit(2)
	for opt, arg in opts:
		if opt in ('-h','--help'):
			print help_message
			sys.exit()
		elif opt in ('-i','--inputdir'):
			inputdir = arg
			if inputdir[-1] != '/':
				inputdir += '/'
	hashobject = Fastq_Reader(inputdir+'read_partitions/',inputdir+'read_partitions/')
	f = open(bact_names_path)
	BNames = cPickle.load(f)
	f.close()
	f = open(vir_names_path)
	VNames = cPickle.load(f)
	f.close()
	f = open(inputdir+'lib_estimates/samples_grouped_by_lib.csv')
	reader = csv.reader(f)
	Sample_ids = []
	for row in reader:
		Sample_ids += row
	f.close()
	f_main = open(inputdir+'assembly_alignment_summary.csv','w')
	writer_main = csv.writer(f_main)
	writer_main.writerow(['partition','N50','largest contig','total bp','scaffolds','top bacterial alignment','alignment length','top viral alignment','alignment length'])