def test04(self): with BZ2File(self.data('rle_100_left.ssearch.bz2')) as f: aligns = list(sequtils.parse_ssearch36(f)) # 100 total sequences self.assertEqual(len(set(a['q_name'] for a in aligns)), 100) # searched against 4 primers self.assertEqual(len(aligns), 400)
def action(args): extras = parse_extras(args.extra_fields) if args.extra_fields else {} aligns = islice(parse_ssearch36(args.alignments, False), args.limit) if args.min_zscore: aligns = (a for a in aligns if float(a['sw_zscore']) >= args.min_zscore) aligns = groupby(aligns, key=itemgetter('q_name')) if args.top_alignment: aligns = (next(a) for _, a in aligns) else: aligns = (a for _, i in aligns for a in i) # flatten groupby iters if args.rlefile: decoding = {k: v for d in args.rlefile for k, v in d.items()} def decode(aligns): aligns['t_seq'], aligns['q_seq'] = homodecodealignment( aligns['t_seq'], from_ascii(decoding[aligns['t_name']]), aligns['q_seq'], from_ascii(decoding[aligns['q_name']])) return aligns aligns = imap(decode, aligns) if args.print_one: pprint.pprint(aligns.next()) sys.exit() if args.with_diff: aligns = imap(add_diff, aligns) if args.fieldnames: fieldnames = args.fieldnames else: # peek at first row fieldnames top = next(aligns, {}) fieldnames = top.keys() aligns = chain([top], aligns) if extras: fieldnames += extras.keys() aligns = (dict(d, **extras) for d in aligns) writer = csv.DictWriter(args.out, extrasaction='ignore', fieldnames=fieldnames) if args.header: writer.writeheader() for a in aligns: writer.writerow(a)
def test03(self): """ No output name, clean up """ q, t = self.data('two.fasta'), self.data('ten.fasta') with sequtils.run_ssearch(q, t) as aligns: parsed = sequtils.parse_ssearch36(aligns) self.assertEqual(set(['H59735', 'T70875']), {d['q_name'] for d in parsed}) # should not still exist self.assertFalse(path.exists(aligns.name))
def test02(self): """ No output name, don't clean up """ q, t = self.data('two.fasta'), self.data('ten.fasta') with sequtils.run_ssearch(q, t, cleanup=False) as aligns: parsed = sequtils.parse_ssearch36(aligns) self.assertEqual(set(['H59735', 'T70875']), {d['q_name'] for d in parsed}) # should still exist since we specified 'cleanup=False' self.assertTrue(path.exists(aligns.name))
def test01(self): """ Provide an output name """ q, t = self.data('two.fasta'), self.data('ten.fasta') out = path.join(self.mkoutdir(), 'aligns.ssearch') with sequtils.run_ssearch(q, t, out) as aligns: self.assertEqual(out, aligns.name) parsed = sequtils.parse_ssearch36(aligns) self.assertEqual(set(['H59735', 'T70875']), {d['q_name'] for d in parsed}) # should still exist since we provided a name for the output self.assertTrue(path.exists(out))
def action(args): # setup ssearch command and communicate command = ["ssearch36"] command += ["-m", "10"] # 10 is parseable key:value output format command += ["-n"] # DNA/RNA query command += ["-z", args.statistical_calculation] command += ["-g", args.gap_extension_penalty] command += ["-f", args.gap_open_penalty] command += ["-T", str(args.threads)] if args.strand == "forward": command += ["-3"] # forward strand only if args.full_sequences: command += ["-a"] if not args.all_alignments: command += ["-b", "1"] command += ["-d", "1"] command += [args.query, args.library] # If query or library file is empty, don't bother executing ssearch. # Just print empty file # with a header and exit if os.stat(args.query).st_size == 0 or os.stat(args.library).st_size == 0: # write empty header if args.fieldnames: fieldnames = args.fieldnames if fieldnames: writer = DictWriter(args.out, extrasaction="ignore", fieldnames=fieldnames) if args.header: writer.writeheader() return log.info(" ".join(command)) ssearch = Popen(command, stdout=PIPE, stderr=PIPE) # parse alignments aligns = parse_ssearch36(ssearch.stdout) aligns = (a for a in aligns if float(a["sw_zscore"]) >= args.min_zscore) aligns = groupby(aligns, key=itemgetter("q_name")) aligns = (a for _, i in aligns for a in i) # flatten groupby iters # decode if appropriate if args.decode: decoding = {k: v for d in args.decode for k, v in d.items()} def decode(aligns): aligns["t_seq"], aligns["q_seq"] = homodecodealignment( aligns["t_seq"], from_ascii(decoding[aligns["t_name"]]), aligns["q_seq"], from_ascii(decoding[aligns["q_name"]]), ) return aligns aligns = imap(decode, aligns) # calculate coverage for each item and repack into generator # coverage = |query alignment| / |query length| aligns = ( dict(d, qcovs=str((float(d["q_al_stop"]) - float(d["q_al_start"])) / float(d["q_sq_len"]))) for d in aligns ) # write results if args.fieldnames: fieldnames = args.fieldnames else: # peek at first row fieldnames top = next(aligns, {}) fieldnames = top.keys() if top: aligns = chain([top], aligns) writer = DictWriter(args.out, extrasaction="ignore", fieldnames=fieldnames) if args.header: writer.writeheader() for a in aligns: writer.writerow(a) error = set(e.strip() for e in ssearch.stderr) error = ", ".join(error) if ssearch.wait() != 0: raise CalledProcessError(ssearch.returncode, error) if error: log.error(error)
def action(args): # setup ssearch command and communicate command = ['fasta36'] command += ['-m', '10'] command += ['-3'] command += ['-n'] command += ['-g', args.gap_extension_penalty] command += ['-f', args.gap_open_penalty] command += ['-T', str(args.threads)] if args.full_sequences: command += ['-a'] if not args.all_alignments: command += ['-b', '1'] command += ['-d', '1'] command += [args.query, args.library] log.info(' '.join(command)) pipe = Popen(command, stdout = PIPE, stderr = PIPE) # parse alignments aligns = parse_ssearch36(pipe.stdout) aligns = (a for a in aligns if float(a['fa_zscore']) >= args.min_zscore) aligns = groupby(aligns, key = itemgetter('q_name')) aligns = (a for _,i in aligns for a in i) # flatten groupby iters # decode if appropriate if args.decode: decoding = {k:v for d in args.decode for k,v in d.items()} def decode(aligns): aligns['t_seq'], aligns['q_seq'] = homodecodealignment( aligns['t_seq'], from_ascii(decoding[aligns['t_name']]), aligns['q_seq'], from_ascii(decoding[aligns['q_name']])) return aligns aligns = imap(decode, aligns) # write results if args.fieldnames: fieldnames = args.fieldnames else: # peek at first row fieldnames top = next(aligns, {}) fieldnames = top.keys() if top: aligns = chain([top], aligns) if fieldnames: writer = DictWriter(args.out, extrasaction = 'ignore', fieldnames = fieldnames) if args.header: writer.writeheader() for a in aligns: writer.writerow(a) error = set(e.strip() for e in pipe.stderr) error = ', '.join(error) if pipe.wait() != 0: raise CalledProcessError(pipe.returncode, error) if error: log.error(error)
def action(args): # setup ssearch command and communicate command = ['fasta36'] command += ['-m', '10'] command += ['-3'] command += ['-n'] command += ['-g', args.gap_extension_penalty] command += ['-f', args.gap_open_penalty] command += ['-T', str(args.threads)] if args.full_sequences: command += ['-a'] if not args.all_alignments: command += ['-b', '1'] command += ['-d', '1'] command += [args.query, args.library] log.info(' '.join(command)) pipe = Popen(command, stdout=PIPE, stderr=PIPE) # parse alignments aligns = parse_ssearch36(pipe.stdout) aligns = (a for a in aligns if float(a['fa_zscore']) >= args.min_zscore) aligns = groupby(aligns, key=itemgetter('q_name')) aligns = (a for _, i in aligns for a in i) # flatten groupby iters # decode if appropriate if args.decode: decoding = {k: v for d in args.decode for k, v in d.items()} def decode(aligns): aligns['t_seq'], aligns['q_seq'] = homodecodealignment( aligns['t_seq'], from_ascii(decoding[aligns['t_name']]), aligns['q_seq'], from_ascii(decoding[aligns['q_name']])) return aligns aligns = imap(decode, aligns) # write results if args.fieldnames: fieldnames = args.fieldnames else: # peek at first row fieldnames top = next(aligns, {}) fieldnames = top.keys() if top: aligns = chain([top], aligns) if fieldnames: writer = DictWriter(args.out, extrasaction='ignore', fieldnames=fieldnames) if args.header: writer.writeheader() for a in aligns: writer.writerow(a) error = set(e.strip() for e in pipe.stderr) error = ', '.join(error) if pipe.wait() != 0: raise CalledProcessError(pipe.returncode, error) if error: log.error(error)
def action(args): # setup ssearch command and communicate command = ['ssearch36'] command += ['-m', '10'] # 10 is parseable key:value output format command += ['-n'] # DNA/RNA query command += ['-z', args.statistical_calculation] command += ['-g', args.gap_extension_penalty] command += ['-f', args.gap_open_penalty] command += ['-T', str(args.threads)] if args.strand == 'forward': command += ['-3'] # forward strand only if args.full_sequences: command += ['-a'] if not args.all_alignments: command += ['-b', '1'] command += ['-d', '1'] command += [args.query, args.library] # If query or library file is empty, don't bother executing ssearch. # Just print empty file # with a header and exit if os.stat(args.query).st_size == 0 or os.stat(args.library).st_size == 0: # write empty header if args.fieldnames: fieldnames = args.fieldnames if fieldnames: writer = DictWriter(args.out, extrasaction='ignore', fieldnames=fieldnames) if args.header: writer.writeheader() return log.info(' '.join(command)) ssearch = Popen(command, stdout=PIPE, stderr=PIPE) # parse alignments aligns = parse_ssearch36(ssearch.stdout) aligns = (a for a in aligns if float(a['sw_zscore']) >= args.min_zscore) aligns = groupby(aligns, key=itemgetter('q_name')) aligns = (a for _, i in aligns for a in i) # flatten groupby iters # decode if appropriate if args.decode: decoding = {k: v for d in args.decode for k, v in d.items()} def decode(aligns): aligns['t_seq'], aligns['q_seq'] = homodecodealignment( aligns['t_seq'], from_ascii(decoding[aligns['t_name']]), aligns['q_seq'], from_ascii(decoding[aligns['q_name']])) return aligns aligns = imap(decode, aligns) # calculate coverage for each item and repack into generator # coverage = |query alignment| / |query length| aligns = (dict(d, qcovs=str( (float(d['q_al_stop']) - float(d['q_al_start'])) / float(d['q_sq_len']) )) for d in aligns) # write results if args.fieldnames: fieldnames = args.fieldnames else: # peek at first row fieldnames top = next(aligns, {}) fieldnames = top.keys() if top: aligns = chain([top], aligns) writer = DictWriter(args.out, extrasaction='ignore', fieldnames=fieldnames) if args.header: writer.writeheader() for a in aligns: writer.writerow(a) error = set(e.strip() for e in ssearch.stderr) error = ', '.join(error) if ssearch.wait() != 0: raise CalledProcessError(ssearch.returncode, error) if error: log.error(error)