Exemplo n.º 1
0
def count_unmapped(sam_file):
	''' Print number of unmapped reads in sam_file.
	'''
	mapped = 0
	unmapped = 0
	for data in sc_read_simple_tab_file(sam_file):
		read_id = data[0]
		match = data[2]
		if match == "*":
			unmapped += 1
		else:
			mapped += 1
		print unmapped, mapped, "\r",
	print 
	print "Unmapped %s from %s mapped" % (unmapped, mapped)
	return (mapped, unmapped)
Exemplo n.º 2
0
def check_adapters(settings):
	'''
	'''
	print "Load library for key=", settings["k"]
	with open(settings["pickle_libraries_file"]) as fh:
		library = cPickle.load(fh)
	library = library[settings["k"]]
	assert len(library.keys()[0]) == settings["k"]
	print "Library size:", len(library.keys())
	contaminated_kmers = {}
	print "Iter over kmers"
	for i, d in enumerate(sc_read_simple_tab_file(settings["fastq_file"])):
		(kmer, tf) = d
		tf = int(tf)
		kmer = kmer.lower()
		print i, kmer, tf, "\r",
		if settings["cutoff"] and tf < settings["cutoff"]:
			break
		rkmer = get_revcomp(kmer)
		if kmer in library or rkmer in library:
			print
			print kmer, tf, library[kmer]
			contaminated_kmers[kmer] = (tf, library[kmer])
	all_kmers = set(contaminated_kmers.keys())
	contaminated_kmers = contaminated_kmers.items()
	contaminated_kmers.sort(key=lambda x: x[1], reverse=True)
	print "Save data"
	with open(settings["output_file"], "w") as fh:
		for (k, v) in contaminated_kmers:
			rkey = get_revcomp(k)
			s = "%s\t%s\n" % (k, v)
			fh.write(s)
			if not rkey in all_kmers:
				s = "%s\t%s\n" % (rkey, v)
				fh.write(s)
	return contaminated_kmers