def krill_open(q_min, q_percent, p64, in1, in2, pe_1, pe_2, se_1, se_2, f_1, f_2): ''' quality filter test for single and paired-end reads ''' compressed = gzip_test(in1) if compressed: pe_1, _ = os.path.splitext(pe_1) pe_2, _ = os.path.splitext(pe_2) se_1, _ = os.path.splitext(se_1) se_2, _ = os.path.splitext(se_2) f_1, _ = os.path.splitext(f_1) f_2, _ = os.path.splitext(f_2) with gzip.open(in1, 'rt') as f1, gzip.open(in2, 'rt') as f2,\ open(pe_1, "w") as pe_o1,\ open(pe_2, "w") as pe_o2,\ open(se_1, "w") as se_o1,\ open(se_2, "w") as se_o2,\ open(f_1, "w") as fail_o1,\ open(f_2, "w") as fail_o2: krill(q_min, q_percent, p64, f1, f2, pe_o1, pe_o2, se_o1, se_o2, fail_o1, fail_o2) else: with open(in1) as f1, open(in2) as f2,\ open(pe_1, "w") as pe_o1,\ open(pe_2, "w") as pe_o2,\ open(se_1, "w") as se_o1,\ open(se_2, "w") as se_o2,\ open(f_1, "w") as fail_o1,\ open(f_2, "w") as fail_o2: krill(q_min, q_percent, p64, f1, f2, pe_o1, pe_o2, se_o1, se_o2, fail_o1, fail_o2)
def porifera_open(in1, in2, subseqs1, subseqs2, se_1, pe_1, se_2, pe_2, k, rounds, match, min_l, tiny_ls1, tiny_ls2, tiny): ''' open paired end files for adapter detection ''' compressed = gzip_test(in1) if compressed: pe_1, _ = os.path.splitext(pe_1) pe_2, _ = os.path.splitext(pe_2) se_1, _ = os.path.splitext(se_1) se_2, _ = os.path.splitext(se_2) with gzip.open(in1, 'rt') as f1, gzip.open(in2, 'rt') as f2,\ open(pe_1, 'w') as pe_o1,\ open(pe_2, 'w') as pe_o2,\ open(se_1, 'w') as se_o1,\ open(se_2, 'w') as se_o2: porifera(f1, f2, subseqs1, subseqs2, pe_o1, pe_o2, se_o1, se_o2, k, rounds, match, min_l, tiny_ls1, tiny_ls2, tiny) else: with open(in1, 'rt') as f1, open(in2, 'rt') as f2,\ open(pe_1, 'w') as pe_o1,\ open(pe_2, 'w') as pe_o2,\ open(se_1, 'w') as se_o1,\ open(se_2, 'w') as se_o2: porifera(f1, f2, subseqs1, subseqs2, pe_o1, pe_o2, se_o1, se_o2, k, rounds, match, min_l, tiny_ls1, tiny_ls2, tiny)
def rotifer_open(R1_bases_ls, R2_bases_ls, in1, in2, pe_1, pe_2, se_1, se_2, trim): ''' parse single and paired-end reads for recognized motifs ''' compressed = gzip_test(in1) if compressed: pe_1, _ = os.path.splitext(pe_1) pe_2, _ = os.path.splitext(pe_2) se_1, _ = os.path.splitext(se_1) se_2, _ = os.path.splitext(se_2) with gzip.open(in1, 'rt') as f1, gzip.open(in2, 'rt') as f2,\ open(pe_1, "w") as pe_o1,\ open(pe_2, "w") as pe_o2,\ open(se_1, "w") as se_o1,\ open(se_2, "w") as se_o2: if trim: rotifer_trim(R1_bases_ls, R2_bases_ls, f1, f2, pe_o1, pe_o2, se_o1, se_o2, trim) else: rotifer(R1_bases_ls, R2_bases_ls, f1, f2, pe_o1, pe_o2, se_o1, se_o2) else: with open(in1) as f1, open(in2) as f2,\ open(pe_1, "w") as pe_o1,\ open(pe_2, "w") as pe_o2,\ open(se_1, "w") as se_o1,\ open(se_2, "w") as se_o2: if trim: rotifer_trim(R1_bases_ls, R2_bases_ls, f1, f2, pe_o1, pe_o2, se_o1, se_o2, trim) else: rotifer(R1_bases_ls, R2_bases_ls, f1, f2, pe_o1, pe_o2, se_o1, se_o2)
def porifera_single_open(args): ''' open files for adapter detection ''' compressed = gzip_test(args.in1) if compressed: se_1, _ = os.path.splitext(args.se_1) with gzip.open(args.in1, 'rt') as f, open(se_1, 'w') as o: porifera_single(args, f, o) else: with open(args.in1) as f, open(args.se_1, 'w') as o: porifera_single(args, f, o)
def krill_single_open(q_min, q_percent, p64, in1, se_1): ''' quality filter test for single-end reads ''' compressed = gzip_test(in1) if compressed: se_1, _ = os.path.splitext(se_1) with gzip.open(in1, 'rt') as f1, open(se_1, "w") as se_o1: krill_single(q_min, q_percent, p64, f1, se_o1) else: with open(in1) as f1, open(se_1, "w") as se_o1: krill_single(q_min, q_percent, p64, f1, se_o1)
def scallop_open(in1, front_trim, end_trim, out1): ''' test if gzipped, then open single-end files ''' compressed = gzip_test(in1) if compressed: out1, _ = os.path.splitext(out1) with gzip.open(in1, 'rt') as f, open(out1, 'w') as o: scallop(front_trim, end_trim, f, o) else: with open(in1) as f, open(out1, 'w') as o: scallop(front_trim, end_trim, f, o)
def scallop_single_end_open(in1, out1, front_trim, end_score, window, min_l): ''' test if gzipped, then open single-end files ''' compressed = gzip_test(in1) if compressed: out1, _ = os.path.splitext(out1) with gzip.open(in1, 'rt') as f, open(out1, 'w') as o: scallop_single_end_line(f, o, front_trim, end_score, window, min_l) else: with open(in1) as f, open(out1, 'w') as o: scallop_single_end_line(f, o, front_trim, end_score, window, min_l)
def anemone_single_open(in1, out1, of1_ls, mismatch, bcs, proj, round_one, front_trim): ''' create IO file object based on gzipped status for se data ''' compressed = gzip_test(in1) if compressed: with gzip.open(in1, 'rt') as f1: anemone_single(f1, out1, of1_ls, mismatch, bcs, proj, round_one, front_trim) else: with open(in1) as f1: anemone_single(f1, out1, of1_ls, mismatch, bcs, proj, round_one, front_trim)
def crinoid_open(in1, out1, out2, procs, p64): ''' open as gzipped file object if gzipped ''' compressed = gzip_test(in1) if compressed: with gzip.open(in1, 'rt') as f: k_score = kmer_test(f) with gzip.open(in1, 'rt') as f: crinoid(f, out1, out2, procs, p64, k_score) else: with open(in1) as f: k_score = kmer_test(f) with open(in1) as f: crinoid(f, out1, out2, procs, p64, k_score)
def porifera_single_open(in1, subseqs, se_1, k, rounds, match, min_l, tiny_ls, tiny): ''' open files for adapter detection ''' compressed = gzip_test(in1) if compressed: se_1, _ = os.path.splitext(se_1) with gzip.open(in1, 'rt') as f, open(se_1, 'w') as o: porifera_single(f, subseqs, o, k, rounds, match, min_l, tiny_ls, tiny) else: with open(in1) as f, open(se_1, 'w') as o: porifera_single(f, subseqs, o, k, rounds, match, min_l, tiny_ls, tiny)
def anemone_comp(in1_ls, in2_ls, mismatch, bcs_dict, curr, front_trim, in1): ''' composer entry point to anemone ''' if in1 in in2_ls: return try: in2 = in2_ls[in1_ls.index(in1)] out2 = os.path.basename(in2) except (IndexError, ValueError) as e: in2 = False out2 = False out1 = os.path.basename(in1) compressed = gzip_test(in1) if compressed: out1 = out1[:-3] if out1.endswith('.gz') else out1 if out2: out2 = out2[:-3] if out2.endswith('.gz') else out2 if out1 in bcs_dict.keys(): pass elif out1 + '.gz' in bcs_dict.keys(): bcs_dict[out1] = bcs_dict.pop(out1 + '.gz') ''' the following copies files not found in index.txt ''' try: bcs_file = bcs_dict[out1] except KeyError: shutil.copy(in1, curr) try: shutil.copy(in2, curr) except TypeError: pass return if in1.endswith('.fastq'): tmp_dir = os.path.basename(in1)[:-6] elif in1.endswith('.fastq.gz'): tmp_dir = os.path.basename(in1)[:-9] else: tmp_dir = os.path.basename(in1) subdir = os.path.join(curr, tmp_dir) os.mkdir(subdir) anemone_init(in1, in2, out1, out2, mismatch, bcs_file, subdir, front_trim)
def anemone_open(in1, in2, out1, out2, of1_ls, of2_ls, mismatch, bcs, proj, round_one, front_trim): ''' create IO file object based on gzipped status for pe data ''' compressed = gzip_test(in1) if compressed: with gzip.open(in1, 'rt') as f1, gzip.open(in2, 'rt') as f2: of1_ls, of2_ls = anemone(f1, f2, out1, out2, of1_ls, of2_ls, mismatch, bcs, proj, round_one, front_trim) else: with open(in1) as f1, open(in2) as f2: of1_ls, of2_ls = anemone(f1, f2, out1, out2, of1_ls, of2_ls, mismatch, bcs, proj, round_one, front_trim) return of1_ls, of2_ls
def rotifer_single_open(R1_bases_ls, in1, se_1, f_1, trim): ''' parse single-end reads for recognized motifs ''' compressed = gzip_test(in1) if compressed: se_1, _ = os.path.splitext(se_1) f_1, _ = os.path.splitext(f_1) with gzip.open(in1, 'rt') as f1, open(se_1, "w") as se_o1, open(f_1, "w") as fail_o1: rotifer_single(R1_bases_ls, f1, se_o1, fail_o1) else: with open(in1) as f1, open(se_1, "w") as se_o1, open(f_1, "w") as fail_o1: rotifer_single(R1_bases_ls, f1, se_o1, fail_o1)
def fastq_test(fastq_ls): ''' test if gzipped fastq file ''' fastq_dt, in1_ls, in2_ls = {}, [], [] for filename in fastq_ls: compressed = gzip_test(filename) if compressed is None: sys.exit('\n\n' + filename + msg.fastq_test1) elif compressed is True: with gzip.open(filename, 'rt') as f: fastq_dt = fastq_structure(f, filename, fastq_dt) else: with open(filename) as f: fastq_dt = fastq_structure(f, filename, fastq_dt) for i in fastq_dt.values(): if None not in i: in1_ls.append(i[0]) in2_ls.append(i[1]) return in1_ls, in2_ls
def porifera_open(args): ''' open paired end files for adapter detection ''' compressed = gzip_test(args.in1) if compressed: pe_1, _ = os.path.splitext(args.pe_1) pe_2, _ = os.path.splitext(args.pe_2) se_1, _ = os.path.splitext(args.se_1) se_2, _ = os.path.splitext(args.se_2) with gzip.open(args.in1, 'rt') as f1, gzip.open(args.in2, 'rt') as f2,\ open(pe_1, 'w') as pe_o1,\ open(pe_2, 'w') as pe_o2,\ open(se_1, 'w') as se_o1,\ open(se_2, 'w') as se_o2: porifera(args, f1, f2, pe_o1, pe_o2, se_o1, se_o2) else: with open(args.in1, 'rt') as f1, open(args.in2, 'rt') as f2,\ open(args.pe_1, 'w') as pe_o1,\ open(args.pe_2, 'w') as pe_o2,\ open(args.se_1, 'w') as se_o1,\ open(args.se_2, 'w') as se_o2: porifera(args, f1, f2, pe_o1, pe_o2, se_o1, se_o2)
def scallop_end_open(in1, in2, pe_1, pe_2, se_1, se_2, front_trim, end_score, window, min_l): ''' test if gzipped, then open paired-end files ''' compressed = gzip_test(in1) if compressed: pe_1, _ = os.path.splitext(pe_1) pe_2, _ = os.path.splitext(pe_2) se_1, _ = os.path.splitext(se_1) se_2, _ = os.path.splitext(se_2) with gzip.open(in1, 'rt') as f1, gzip.open(in2, 'rt') as f2,\ open(pe_1, "w") as pe_o1,\ open(pe_2, "w") as pe_o2,\ open(se_1, "w") as se_o1,\ open(se_2, "w") as se_o2: scallop_end_line(f1, f2, pe_o1, pe_o2, se_o1, se_o2, front_trim, end_score, window, min_l) else: with open(in1) as f1, open(in2) as f2,\ open(pe_1, "w") as pe_o1,\ open(pe_2, "w") as pe_o2,\ open(se_1, "w") as se_o1,\ open(se_2, "w") as se_o2: scallop_end_line(f1, f2, pe_o1, pe_o2, se_o1, se_o2, front_trim, end_score, window, min_l)