def clean_se_run(settings): ''' ''' # 1. Reads filter illumina kmers # 2. Filter data # 3. Save print "Load library for key=", settings["k"] with open(settings["pickle_libraries_file"]) as fh: library = cPickle.load(fh) library = library[settings["k"]] kmers =set(library.keys()) for kmer in library.keys(): kmers.add(get_revcomp(kmer)) with open(settings["dat_libraries_file"], "w") as fh: for kmer in kmers: fh.write("%s\t-\n" % kmer) prefix = settings["prefix"] verbose = settings["verbose"] adapters_file = settings["dat_libraries_file"] fastq1_file = "%s.fastq" % prefix fastq1ok_file = "%s.ok.fastq" % prefix fastq_bad_file = "%s.bad.fastq" % prefix clean_single_read_data(fastq1_file, fastq1ok_file, fastq_bad_file, verbose=verbose, adapters_file=adapters_file, cutoff=settings["cutoff"], polyG_cutoff=settings["polyGcutoff"] )
#!/usr/bin/env python # -*- coding: utf-8 -*- # #@created: 10.10.2013 #@author: Aleksey Komissarov #@contact: [email protected] import sys from PyBioSnippets.hiseq.fastq_tools import clean_single_read_data import argparse if __name__ == '__main__': parser = argparse.ArgumentParser(description='Check presence of adapter kmers.') parser.add_argument('-p','--prefix', help='SE prefix', required=True) parser.add_argument('-v','--verbose', help='Verbose', required=False, default=False) parser.add_argument('-G','--polyG', help='Length of polyG', required=False, default=23) parser.add_argument('-c','--cutoff', help='Length cutoff', required=False, default=50) parser.add_argument('-a','--adapters', help='File with adapters', required=False, default=None) args = vars(parser.parse_args()) prefix = args["prefix"] verbose = args["verbose"] cutoff = int(args["cutoff"]) polyG_cutoff = int(args["polyG"]) adapters_file = args["adapters"] fastq1_file = "%s.fastq" % prefix fastq1ok_file = "%s.ok.fastq" % prefix fastq_bad_file = "%s.bad.fastq" % prefix clean_single_read_data(fastq1_file, fastq1ok_file, fastq_bad_file, verbose=verbose, adapters_file=adapters_file, cutoff=cutoff, polyG_cutoff=polyG_cutoff)