def count_unmapped(sam_file): ''' Print number of unmapped reads in sam_file. ''' mapped = 0 unmapped = 0 for data in sc_read_simple_tab_file(sam_file): read_id = data[0] match = data[2] if match == "*": unmapped += 1 else: mapped += 1 print unmapped, mapped, "\r", print print "Unmapped %s from %s mapped" % (unmapped, mapped) return (mapped, unmapped)
def check_adapters(settings): ''' ''' print "Load library for key=", settings["k"] with open(settings["pickle_libraries_file"]) as fh: library = cPickle.load(fh) library = library[settings["k"]] assert len(library.keys()[0]) == settings["k"] print "Library size:", len(library.keys()) contaminated_kmers = {} print "Iter over kmers" for i, d in enumerate(sc_read_simple_tab_file(settings["fastq_file"])): (kmer, tf) = d tf = int(tf) kmer = kmer.lower() print i, kmer, tf, "\r", if settings["cutoff"] and tf < settings["cutoff"]: break rkmer = get_revcomp(kmer) if kmer in library or rkmer in library: print print kmer, tf, library[kmer] contaminated_kmers[kmer] = (tf, library[kmer]) all_kmers = set(contaminated_kmers.keys()) contaminated_kmers = contaminated_kmers.items() contaminated_kmers.sort(key=lambda x: x[1], reverse=True) print "Save data" with open(settings["output_file"], "w") as fh: for (k, v) in contaminated_kmers: rkey = get_revcomp(k) s = "%s\t%s\n" % (k, v) fh.write(s) if not rkey in all_kmers: s = "%s\t%s\n" % (rkey, v) fh.write(s) return contaminated_kmers