template.application_select(['Pfam', 'Panther', 'SignalP']) template.output_select = 'json' template.poll_time = 60 template.poll_max = 100 sequence_limit = 20 batch_limit = 5 n_sequence = 0 nskip = 60 s = 0 while fasta.next(): while s < nskip: s += 1 fasta.next() if fasta.length() < args.minlen: continue # skip short sequences if n_sequence >= sequence_limit: break # run up to sequence_limit queries n_sequence += 1 # copy the template and add the sequence information ips = template.clone() ips.sequence = fasta.translate().seq.rstrip('*').format(linelen=60) ips.jobname = fasta.id ips.title = 'ORF{}'.format(n_sequence) joblist[ips] = 'new' # submit job if ips.run(): joblist[ips] = 'submitted' if n_sequence % batch_limit and n_sequence < sequence_limit:
fasta = Fasta() fasta.fh = cl.fasta_file trimre = re.compile(trim) # initialize counters base_total = 0 base_current = 0 n_out = 0 n_seq = 0 n_current = 0 while fasta.next(): if trimre: fasta.trimDocByRegex(trimre) if not n_seq or base_current + fasta.length() > maxbases: # if number of bases would be greater than cutoff after adding the new sequence # close current output file, open new # report statistics for old file # reset current file counters try: # prevents error on first pass - file is not yet open outfile.close() print(' ', base_current, 'bases/amino acids', end=' ') print('in', n_current, 'sequences', end=' ') print('written to', outfilename) except NameError: pass
base = base.replace('.seq', '') sys.stdout.write('\n\tExpanded file: {}\n\tbasename: {}\n'.format( infilename, base)) outfilename = base + '.fasta' outfile = None try: outfile = open(outfilename, 'w') except: sys.stderr.write( 'Unable to open output file ({})\n'.format(outfilename)) exit(2) # process all sequences in the file n = 0 for seq in infile: fasta = Fasta() fasta.id = base + '_{}'.format(n) fasta.seq = seq.rstrip().upper() fasta.doc = 'length={}'.format(fasta.length()) outfile.write(fasta.format(linelen=100)) n += 1 infile.close() outfile.close() sys.stdout.write('\t{} sequences written to {}\n'.format( n, outfilename)) # end of loop over files exit(0)