if parts == header: # Ignore the header line continue if not parts[-1] and len(parts) == len(header) + 1: # Ignore dummy blank extra column, e.g. # "...2.0\t\tPSORTb version 3.0\t\n" parts = parts[:-1] assert len(parts) == len(header), \ "%i fields, not %i, in line:\n%r" % (len(line), len(header), line) out_handle.write(line) count += 1 return count # Note that if the input FASTA file contains no sequences, # split_fasta returns an empty list (i.e. zero temp files). fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK) temp_files = [f + ".out" for f in fasta_files] jobs = ["psort %s %s %s -o %s %s > %s" % (org_type, cutoff, divergent, out_type, fasta, temp) for fasta, temp in zip(fasta_files, temp_files)] def clean_up(file_list): for f in file_list: if os.path.isfile(f): os.remove(f) try: os.rmdir(tmp_dir) except Exception: pass if len(jobs) > 1 and num_threads > 1:
"Medium likely prediction", "Highly likely prediction"]: stop_err("ERROR: Problem with line: %r" % line) out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood)) return queries working_dir, bin = get_path_and_binary() if not os.path.isfile(fasta_file): stop_err("ERROR: Missing input FASTA file %r" % fasta_file) #Note that if the input FASTA file contains no sequences, #split_fasta returns an empty list (i.e. zero temp files). #We deliberately omit the FASTA descriptions to avoid a #bug in promoter2 with descriptions over 200 characters. fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "promoter"), FASTA_CHUNK, keep_descr=False) temp_files = [f+".out" for f in fasta_files] jobs = ["%s %s > %s" % (bin, fasta, temp) for fasta, temp in zip(fasta_files, temp_files)] def clean_up(file_list): for f in file_list: if os.path.isfile(f): os.remove(f) try: os.rmdir(tmp_dir) except: pass if len(jobs) > 1 and num_threads > 1: #A small "info" message for Galaxy to show the user.
) ) tab_handle.close() gff_handle.close() if num_threads == 1: # Still want to call split_fasta to apply truncation, but # no reason to make multiple files - and more chance of # hitting file system glitches if we do. So, FASTA_CHUNK = sys.maxsize fasta_files = split_fasta( fasta_file, os.path.join(tmp_dir, "signalp"), n=FASTA_CHUNK, truncate=truncate, max_len=MAX_LEN, ) temp_files = [f + ".out" for f in fasta_files] assert len(fasta_files) == len(temp_files) jobs = [ "signalp -short -t %s %s > %s" % (organism, fasta, temp) for (fasta, temp) in zip(fasta_files, temp_files) ] assert len(fasta_files) == len(temp_files) == len(jobs) def clean_up(file_list): """Remove temp files, and if possible the temp directory.""" for f in file_list:
assert first60.startswith("First60="), line first60 = first60[8:] assert predhel.startswith("PredHel="), line predhel = predhel[8:] assert topology.startswith("Topology="), line topology = topology[9:] out_handle.write( "%s\t%s\t%s\t%s\t%s\t%s\n" % (identifier, length, exp_aa, first60, predhel, topology)) count += 1 return count # Note that if the input FASTA file contains no sequences, # split_fasta returns an empty list (i.e. zero temp files). fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK) temp_files = [f + ".out" for f in fasta_files] jobs = [ "tmhmm -short %s > %s" % (fasta, temp) for fasta, temp in zip(fasta_files, temp_files) ] def clean_up(file_list): """Remove temp files, and if possible the temp directory.""" for f in file_list: if os.path.isfile(f): os.remove(f) try: os.rmdir(tmp_dir) except Exception:
num_threads = thread_count(sys.argv[2], default=4) fasta_file = sys.argv[3] tabular_file = sys.argv[4] def clean_tabular(raw_handle, out_handle): """Clean up WoLF PSORT output to make it tabular.""" for line in raw_handle: if not line or line.startswith("#"): continue name, data = line.rstrip("\r\n").split(None,1) for rank, comp_data in enumerate(data.split(",")): comp, score = comp_data.split() out_handle.write("%s\t%s\t%s\t%i\n" \ % (name, comp, score, rank+1)) fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK) temp_files = [f+".out" for f in fasta_files] assert len(fasta_files) == len(temp_files) jobs = ["%s %s < %s > %s" % (exe, organism, fasta, temp) for (fasta, temp) in zip(fasta_files, temp_files)] assert len(fasta_files) == len(temp_files) == len(jobs) def clean_up(file_list): for f in file_list: if os.path.isfile(f): os.remove(f) if len(jobs) > 1 and num_threads > 1: #A small "info" message for Galaxy to show the user. print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) results = run_jobs(jobs, num_threads)
tabular_file = sys.argv[4] def clean_tabular(raw_handle, out_handle): """Clean up WoLF PSORT output to make it tabular.""" for line in raw_handle: if not line or line.startswith("#"): continue name, data = line.rstrip("\r\n").split(None, 1) for rank, comp_data in enumerate(data.split(",")): comp, score = comp_data.split() out_handle.write("%s\t%s\t%s\t%i\n" % (name, comp, score, rank + 1)) fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK) temp_files = [f + ".out" for f in fasta_files] assert len(fasta_files) == len(temp_files) jobs = [ "%s %s < %s > %s" % (exe, organism, fasta, temp) for (fasta, temp) in zip(fasta_files, temp_files) ] assert len(fasta_files) == len(temp_files) == len(jobs) def clean_up(file_list): for f in file_list: if os.path.isfile(f): os.remove(f)