def gather(): util.debug("Gathering result") job_results = results paths.ensure(job_results) #aazj00103_05.075_v1_3.bz2 zj001.fasta zj001.psipred_ss2 #aazj00109_05.075_v1_3.bz2 zj001.psipred aa_pattern = "aa"+name+"(03|09)\_05\.075\_v1\_3$" patterns = [aa_pattern] for suf in [".fasta", ".psipred", ".psipred_ss2"]: patterns.append(name+suf) try: for pattern in patterns: print "Pattern ",pattern match = paths.find(pattern, scr_job) if not match: raise "Missing file" for path in match: if pattern == aa_pattern: print "Filtering Columns %s" % path for line in fileinput.input(path, inplace=1): print " "+line[:47].strip() fileinput.close() print "Path ", path file = paths.getFile(path) dest = paths.join(job_results, file) util.copy(path, job_results) if pattern == aa_pattern: util.system("bzip2 %s" % dest) except: paths.removerf(job_results) raise
def _filter_fraglibs(self): # Runs on fragment picking result 3' and 9' fragment libraries (must exist from Frag run). # Filters the file inplace: writes " " + first 47 chars of the line. (IBM formatting). # Also uses bzip2 binary to compress fragment library files (IBM formatting). Can do with bz2 library. if self.frag3_lib == None or self.frag9_lib == None: raise Exception("Fragment library results files do not yet exist. Run Fragmentor.run(...) first.") frag_libs = [self.frag3_lib, self.frag9_lib] for frag_lib in frag_libs: # Filter frag library columns. for line in fileinput.input(frag_lib, inplace=1): print " "+line[:47].strip() fileinput.close() # Bzip frag library file and reset instance variable to reflect new name. cmd = "bzip2 {0}".format(frag_lib) system(cmd) # Update instance variable values to reflect bzip on fragment files. self.results_files.remove(self.frag3_lib) self.results_files.remove(self.frag9_lib) self.frag3_lib = self.frag3_lib+".bz2" self.frag9_lib = self.frag9_lib+".bz2" self.results_files.append(self.frag3_lib) self.results_files.append(self.frag9_lib)
def gather(): util.debug("Gathering result") job_results = results paths.ensure(job_results) #aazj00103_05.075_v1_3.bz2 zj001.fasta zj001.psipred_ss2 #aazj00109_05.075_v1_3.bz2 zj001.psipred aa_pattern = "aa" + name + "(03|09)\_05\.075\_v1\_3$" patterns = [aa_pattern] for suf in [".fasta", ".psipred", ".psipred_ss2"]: patterns.append(name + suf) try: for pattern in patterns: print "Pattern ", pattern match = paths.find(pattern, scr_job) if not match: raise "Missing file" for path in match: if pattern == aa_pattern: print "Filtering Columns %s" % path for line in fileinput.input(path, inplace=1): print " " + line[:47].strip() fileinput.close() print "Path ", path file = paths.getFile(path) dest = paths.join(job_results, file) util.copy(path, job_results) if pattern == aa_pattern: util.system("bzip2 %s" % dest) except: paths.removerf(job_results) raise
def gather(): global code, task_id utilities.debug("Gathering result") paths.ensure(results_dir) # Contents of results directory (3' library, 9' library, fasta, psipred, and psipred_ss2 files). EG: #aazj00103_05.075_v1_3.bz2 zj001.fasta zj001.psipred_ss2 #aazj00109_05.075_v1_3.bz2 zj001.psipred # Set "patterns" for results files. aa_pattern = "aa"+code+task_id+"(03|09)\_05\.075\_v1\_3$" patterns = [aa_pattern] for suf in [".fasta", ".psipred", ".psipred_ss2"]: patterns.append(code+task_id+suf) try: for pattern in patterns: print "Pattern ",pattern # Find the results file in scr_job_dir match = paths.find(pattern, scr_job_dir) # If a results file matching pattern is not found in the scr_jobs_dir, raise exception. if not match: raise "Missing file" # Copy all results files in scr_job_dir to results_dir. for path in match: # If the results file is a frag library, format the frag library file. if pattern == aa_pattern: print "Filtering Columns of file %s" % path # fileinput.input opens a file for iteration. Defining inplace=1 allows to given file to be # altered "in place": file is backed up, and stdout is then directed to the file. # Filters the file: writes " " + first 47 chars of the line. (Specific formatting). for line in fileinput.input(path, inplace=1): print " "+line[:47].strip() fileinput.close() print "Path ", path # Copy the result file to results_dir. utilities.copy(path, results_dir) # If the results file is a frag library, bzip it in the results_dir. if pattern == aa_pattern: dest = paths.join(results_dir, paths.getFile(path)) utilities.system("bzip2 %s" % dest) # If gathering the results files fails, delete the results_dir and exit. except: paths.removerf(results_dir) raise
def gather(): global code, task_id utilities.debug("Gathering result") paths.ensure(results_dir) # Contents of results directory (3' library, 9' library, fasta, psipred, and psipred_ss2 files). EG: #aazj00103_05.075_v1_3.bz2 zj001.fasta zj001.psipred_ss2 #aazj00109_05.075_v1_3.bz2 zj001.psipred # Set "patterns" for results files. aa_pattern = "aa" + code + task_id + "(03|09)\_05\.075\_v1\_3$" patterns = [aa_pattern] for suf in [".fasta", ".psipred", ".psipred_ss2"]: patterns.append(code + task_id + suf) try: for pattern in patterns: print "Pattern ", pattern # Find the results file in scr_job_dir match = paths.find(pattern, scr_job_dir) # If a results file matching pattern is not found in the scr_jobs_dir, raise exception. if not match: raise "Missing file" # Copy all results files in scr_job_dir to results_dir. for path in match: # If the results file is a frag library, format the frag library file. if pattern == aa_pattern: print "Filtering Columns of file %s" % path # fileinput.input opens a file for iteration. Defining inplace=1 allows to given file to be # altered "in place": file is backed up, and stdout is then directed to the file. # Filters the file: writes " " + first 47 chars of the line. (Specific formatting). for line in fileinput.input(path, inplace=1): print " " + line[:47].strip() fileinput.close() print "Path ", path # Copy the result file to results_dir. utilities.copy(path, results_dir) # If the results file is a frag library, bzip it in the results_dir. if pattern == aa_pattern: dest = paths.join(results_dir, paths.getFile(path)) utilities.system("bzip2 %s" % dest) # If gathering the results files fails, delete the results_dir and exit. except: paths.removerf(results_dir) raise
def run_blast(query_file): if not isfile(query_file): raise Exception("Given blast query file {0} not a valid file".format(query_file)) blast_outfile = query_file + '.blast.xml' blast_cmd = "{0} -i {1} -d {2} -e {3} -v {4} -b {5} -a {6} -m {7} -o {8}".format( \ blast_exe, query_file, source_db, e_val, results, alignments, processors, output_mode, blast_outfile) print "Blast command: {0}".format(blast_cmd) try: system(blast_cmd) except Exception as e: print e print "Blasting query file {0} against DB {1} failed".format(query_file, source_db) raise
def run_blast(query_file): if not isfile(query_file): raise Exception( "Given blast query file {0} not a valid file".format(query_file)) blast_outfile = query_file + '.blast.xml' blast_cmd = "{0} -i {1} -d {2} -e {3} -v {4} -b {5} -a {6} -m {7} -o {8}".format( \ blast_exe, query_file, source_db, e_val, results, alignments, processors, output_mode, blast_outfile) print "Blast command: {0}".format(blast_cmd) try: system(blast_cmd) except Exception as e: print e print "Blasting query file {0} against DB {1} failed".format( query_file, source_db) raise
def _run_frag(self, nohoms, fasta): # Takes a boolean nohoms (to pass to script) and the fasta input (filename). # Check to see if script given in config file exists. if not os.path.isfile(self.frag_script): raise Exception("Fragment picking {0} script specified in config file ({1}) is not" \ "accessible.".format(self.frag_script, cofig_file)) # Run the given script on fasta file. Current options: ?-nohoms -nosam -verbose. if nohoms: cmd = "{0} -nohoms -nosam -verbose {1}".format(self.frag_script, fasta) else: cmd = "{0} -nosam -verbose {1}".format(self.frag_script, fasta) print "Fragmentor: executing frag command {0}".format(cmd) system(cmd)
def runScript(no_homs=False): # Script requires the following environment variables # BLAST_DIR # NR_DIR # NNMAKE_DIR - the BLOSUM score matrices # PSIPRED_DIR # NNMAKE_DIR # JUFO_DIR # SAM_DIR # SAM_2ND_DIR - SAM Secondary Structure Prediction utilities.debug("Running fragmentation script") env = { 'BLAST_DIR':'%s/blast' % tools_dir, 'NR_DIR':'%s/db/nr' % scr_dir, 'NNMAKEDB_DIR':'%s/db/nnmake_database' % scr_dir, 'NNMAKE_SHORT_DIR':'%s/nnmake' % tools_dir, 'PSIPRED_DIR':'%s/psipred' % tools_dir, 'JUFO_DIR':'%s/jufo' % tools_dir, 'SAM_DIR':'%s/sam' % tools_dir, 'SAM_2ND_DIR':'%s/sam.predict-2nd' % tools_dir } for key in env : # Check if all paths given in env exist. paths.existsOrFail(env[key]) # os.environ is the system environment. New env. vars. can be set by adding to it. os.environ[key] = env[key] script = paths.join(home, "scripts", frag_script) if no_homs: script = "%s -nohoms" % script fasta = paths.getFile(fasta_file) cmd = "cd %s && %s -verbose -nosam %s" % (scr_job_dir, script, fasta) #cmd = "cd %s && %s -verbose %s" % (scr_job_dir, script, fasta) utilities.system(cmd)
def runScript(no_homs=False): # Script requires the following environment variables # BLAST_DIR # NR_DIR # NNMAKE_DIR - the BLOSUM score matrices # PSIPRED_DIR # NNMAKE_DIR # JUFO_DIR # SAM_DIR # SAM_2ND_DIR - SAM Secondary Structure Prediction utilities.debug("Running fragmentation script") env = { 'BLAST_DIR': '%s/blast' % tools_dir, 'NR_DIR': '%s/db/nr' % scr_dir, 'NNMAKEDB_DIR': '%s/db/nnmake_database' % scr_dir, 'NNMAKE_SHORT_DIR': '%s/nnmake' % tools_dir, 'PSIPRED_DIR': '%s/psipred' % tools_dir, 'JUFO_DIR': '%s/jufo' % tools_dir, 'SAM_DIR': '%s/sam' % tools_dir, 'SAM_2ND_DIR': '%s/sam.predict-2nd' % tools_dir } for key in env: # Check if all paths given in env exist. paths.existsOrFail(env[key]) # os.environ is the system environment. New env. vars. can be set by adding to it. os.environ[key] = env[key] script = paths.join(home, "scripts", frag_script) if no_homs: script = "%s -nohoms" % script fasta = paths.getFile(fasta_file) cmd = "cd %s && %s -verbose -nosam %s" % (scr_job_dir, script, fasta) #cmd = "cd %s && %s -verbose %s" % (scr_job_dir, script, fasta) utilities.system(cmd)
def runScript(): # Script requires the following environment variables # BLAST_DIR # NR_DIR # NNMAKE_DIR - the BLOSUM score matrices # PSIPRED_DIR # NNMAKE_DIR # JUFO_DIR # SAM_DIR # SAM_2ND_DIR - SAM Secondary Structure Prediction util.debug("Running fragmentation script") env = { 'BLAST_DIR':'%s/blast' % shareware, 'NR_DIR':'%s/db/nr' % scr, 'NNMAKEDB_DIR':'%s/db/nnmake_database' % scr, 'NNMAKE_SHORT_DIR':'%s/nnmake' % shareware, 'PSIPRED_DIR':'%s/psipred' % shareware, 'JUFO_DIR':'%s/jufo' % shareware, 'SAM_DIR':'%s/sam' % shareware, 'SAM_2ND_DIR':'%s/sam.predict-2nd' % shareware } for key in env : # Make sure all paths in 'env' exist in the system. paths.existsOrFail(env[key]) # Put 'env' values into the system environment. os.environ[key] = env[key] script = paths.join(home, "scripts", pl_script) if no_homs: script = "%s -nohoms" % script fasta = paths.getFile(scr_fasta) cmd = "cd %s && %s -verbose -nosam %s" % (scr_job, script, fasta) #cmd = "cd %s && %s -verbose %s" % (scr_job, script, fasta) util.system(cmd)
def runScript(): # Script requires the following environment variables # BLAST_DIR # NR_DIR # NNMAKE_DIR - the BLOSUM score matrices # PSIPRED_DIR # NNMAKE_DIR # JUFO_DIR # SAM_DIR # SAM_2ND_DIR - SAM Secondary Structure Prediction util.debug("Running fragmentation script") env = { 'BLAST_DIR': '%s/blast' % shareware, 'NR_DIR': '%s/db/nr' % scr, 'NNMAKEDB_DIR': '%s/db/nnmake_database' % scr, 'NNMAKE_SHORT_DIR': '%s/nnmake' % shareware, 'PSIPRED_DIR': '%s/psipred' % shareware, 'JUFO_DIR': '%s/jufo' % shareware, 'SAM_DIR': '%s/sam' % shareware, 'SAM_2ND_DIR': '%s/sam.predict-2nd' % shareware } for key in env: # Make sure all paths in 'env' exist in the system. paths.existsOrFail(env[key]) # Put 'env' values into the system environment. os.environ[key] = env[key] script = paths.join(home, "scripts", pl_script) if no_homs: script = "%s -nohoms" % script fasta = paths.getFile(scr_fasta) cmd = "cd %s && %s -verbose -nosam %s" % (scr_job, script, fasta) #cmd = "cd %s && %s -verbose %s" % (scr_job, script, fasta) util.system(cmd)
def removerf(path): from hpf.utilities import system system("rm -rf %s" % path)