def _input_as_multiline_string(self, data): """Write a multiline string to a temp file and return the filename. data: a multiline string to be written to a file. * Note: the result will be the filename as a FilePath object (which is a string subclass). """ filename = self._input_filename = \ FilePath(self.getTmpFilename(self.TmpDir)) data_file = open(filename, 'w') data_file.write(data) data_file.close() return filename
def _input_as_list(self, data): '''Takes the positional arguments as input in a list. The list input here should be [query_file_path, database_file_path, output_file_path]''' query, database, output = data if (not isabs(database)) \ or (not isabs(query)) \ or (not isabs(output)): raise ApplicationError("Only absolute paths allowed.\n%s" % ', '.join(data)) self._database = FilePath(database) self._query = FilePath(query) self._output = ResultPath(output, IsWritten=True) # check parameters that can only take a particular set of values # check combination of databse and query type if self.Parameters['-t'].isOn() and self.Parameters['-q'].isOn() and \ (self.Parameters['-t'].Value, self.Parameters['-q'].Value) not in \ self._valid_combinations: error_message = "Invalid combination of database and query " + \ "types ('%s', '%s').\n" % \ (self.Paramters['-t'].Value, self.Parameters['-q'].Value) error_message += "Must be one of: %s\n" % \ repr(self._valid_combinations) raise ApplicationError(error_message) # check database type if self.Parameters['-t'].isOn() and \ self.Parameters['-t'].Value not in self._database_types: error_message = "Invalid database type %s\n" % \ self.Parameters['-t'].Value error_message += "Allowed values: %s\n" % \ ', '.join(self._database_types) raise ApplicationError(error_message) # check query type if self.Parameters['-q'].isOn() and \ self.Parameters['-q'].Value not in self._query_types: error_message = "Invalid query type %s\n" % \ self.Parameters['-q'].Value error_message += "Allowed values: %s\n" % \ ', '.join(self._query_types) raise ApplicationError(error_message) # check mask type if self.Parameters['-mask'].isOn() and \ self.Parameters['-mask'].Value not in self._mask_types: error_message = "Invalid mask type %s\n" % \ self.Parameters['-mask'] error_message += "Allowed Values: %s\n" % \ ', '.join(self._mask_types) raise ApplicationError(error_message) # check qmask type if self.Parameters['-qMask'].isOn() and \ self.Parameters['-qMask'].Value not in self._mask_types: error_message = "Invalid qMask type %s\n" % \ self.Parameters['-qMask'].Value error_message += "Allowed values: %s\n" % \ ', '.join(self._mask_types) raise ApplicationError(error_message) # check repeat type if self.Parameters['-repeats'].isOn() and \ self.Parameters['-repeats'].Value not in self._mask_types: error_message = "Invalid repeat type %s\n" % \ self.Parameters['-repeat'].Value error_message += "Allowed values: %s\n" % \ ', '.join(self._mask_types) raise ApplicationError(error_message) # check output format if self.Parameters['-out'].isOn() and \ self.Parameters['-out'].Value not in self._out_types: error_message = "Invalid output type %s\n" % \ self.Parameters['-out'] error_message += "Allowed values: %s\n" % \ ', '.join(self._out_types) raise ApplicationError(error_message) return ''
def __call__(self, data=None, remove_tmp=True): """Run the application with the specified kwargs on data data: anything that can be cast into a string or written out to a file. Usually either a list of things or a single string or number. input_handler will be called on this data before it is passed as part of the command-line argument, so by creating your own input handlers you can customize what kind of data you want your application to accept remove_tmp: if True, removes tmp files """ input_handler = self.InputHandler suppress_stdout = self.SuppressStdout suppress_stderr = self.SuppressStderr if suppress_stdout: outfile = FilePath('/dev/null') else: outfile = self.getTmpFilename(self.TmpDir) if suppress_stderr: errfile = FilePath('/dev/null') else: errfile = FilePath(self.getTmpFilename(self.TmpDir)) if data is None: input_arg = '' else: input_arg = getattr(self, input_handler)(data) # Build up the command, consisting of a BaseCommand followed by # input and output (file) specifications command = self._command_delimiter.join(filter(None,\ [self.BaseCommand,str(input_arg),'>',str(outfile),'2>',\ str(errfile)])) if self.HaltExec: raise AssertionError, "Halted exec with command:\n" + command # The return value of system is a 16-bit number containing the signal # number that killed the process, and then the exit status. # We only want to keep the exit status so do a right bitwise shift to # get rid of the signal number byte exit_status = system(command) >> 8 # Determine if error should be raised due to exit status of # appliciation if not self._accept_exit_status(exit_status): raise ApplicationError, \ 'Unacceptable application exit status: %s\n' % str(exit_status) +\ 'Command:\n%s\nStdOut:\n%s\nStdErr:\n%s\n' % (command, open(outfile).read(), open(errfile).read()) # bash returns 127 as the exit status if the command could not # be found -- raise an ApplicationError on status == 127. # elif exit_status == 127: # raise ApplicationError, \ # "Could not execute %s. Is it installed? Is it in your path?"\ # % self._command # else: # pass # open the stdout and stderr if not being suppressed out = None if not suppress_stdout: out = open(outfile, "r") err = None if not suppress_stderr: err = open(errfile, "r") try: result = CommandLineAppResult(\ out,err,exit_status,result_paths=self._get_result_paths(data)) except ApplicationError: result = self._handle_app_result_build_failure(\ out,err,exit_status,self._get_result_paths(data)) # Clean up the input file if one was created if remove_tmp: if self._input_filename: remove(self._input_filename) self._input_filename = None return result
def __init__(self,params=None,InputHandler=None,SuppressStderr=None,\ SuppressStdout=None,WorkingDir=None,TmpDir='/tmp', \ TmpNameLen=20, HALT_EXEC=False): """ Initialize the CommandLineApplication object params: a dictionary mapping the Parameter id or synonym to its value (or None for FlagParameters or MixedParameters in flag mode) for Parameters that should be turned on InputHandler: this is the method to be run on data when it is passed into call. This should be a string containing the method name. The default is _input_as_string which casts data to a string before appending it to the command line argument SuppressStderr: if set to True, will route standard error to /dev/null, False by default SuppressStdout: if set to True, will route standard out to /dev/null, False by default WorkingDir: the directory where you want the application to run, default is the current working directory, but is useful to change in cases where the program being run creates output to its current working directory and you either don't want it to end up where you are running the program, or the user running the script doesn't have write access to the current working directory WARNING: WorkingDir MUST be an absolute path! TmpDir: the directory where temp files will be created, /tmp by default TmpNameLen: the length of the temp file name HALT_EXEC: if True, raises exception w/ command output just before execution, doesn't clean up temp files. Default False. """ # Determine if the application is installed, and raise an error if not self._error_on_missing_application(params) # set attributes to parameter that was passed in or class default if InputHandler is not None: self.InputHandler = InputHandler else: self.InputHandler = self._input_handler if SuppressStderr is not None: self.SuppressStderr = SuppressStderr else: self.SuppressStderr = self._suppress_stderr if SuppressStdout is not None: self.SuppressStdout = SuppressStdout else: self.SuppressStdout = self._suppress_stdout if WorkingDir is not None: working_dir = WorkingDir else: working_dir = self._working_dir or getcwd() self.WorkingDir = FilePath(working_dir) self.TmpDir = FilePath(TmpDir) self.TmpNameLen = TmpNameLen self.HaltExec = HALT_EXEC #=========================== #try: # mkdir(self.WorkingDir) #except OSError: # Directory already exists # pass #=========================== # create a variable to hold the name of the file being used as # input to the application. this is important especially when # you are using an input handler which creates a temporary file # and the output filenames are based on the input filenames self._input_filename = None super(CommandLineApplication, self).__init__(params=params)
def main(): option_parser, options, args = parse_command_line_parameters(**script_info) DEBUG = options.verbose check_options(option_parser, options) start_time = time() option_lines = format_options_as_lines(options) if DEBUG: print FORMAT_BAR print "Running with options:" for line in sorted(option_lines): print line print FORMAT_BAR #because the blast app controller uses absolute paths, make sure subject #db path is fully specified subject_db = options.subjectdb if not subject_db.startswith('/'): subject_db = join(getcwd(), subject_db) if not options.no_format_db: #initialize object inpath = FilePath(abspath(options.subjectdb)) subject_dir, subj_file = split(inpath) fdb = FormatDb(WorkingDir=subject_dir) # Currently we do not support protein blasts, but # this would be easy to add in the future... fdb.Parameters['-p'].on('F') # Create indices for record lookup fdb.Parameters['-o'].on('T') # Set input database fdb.Parameters['-i'].on(subject_db) formatdb_cmd = fdb.BaseCommand if DEBUG: print "Formatting db with command: %s" % formatdb_cmd app_result = fdb(subject_db) formatdb_filepaths = [] for v in app_result.values(): try: formatdb_filepaths.append(v.name) except AttributeError: # not a file object, so no path to return pass db_format_time = time() - start_time if DEBUG: print "Formatting subject db took: %2.f seconds" % db_format_time print "formatdb log file written to: %s" % app_result['log'] print FORMAT_BAR else: db_format_time = time() - start_time formatdb_cmd = "None (formatdb not called)" # Check that User-Supplied subjectdb is valid db_ext = [".nhr", ".nin", ".nsd", ".nsi", ".nsq"] formatdb_filepaths = [subject_db + ext for ext in db_ext] if DEBUG: print "Checking that pre-existing formatdb files exist and can be read." print "Files to be checked:" for fp in formatdb_filepaths: print fp print FORMAT_BAR try: formatdb_files = [open(db_f, "U") for db_f in formatdb_filepaths] [f.close() for f in formatdb_files] except IOError: if DEBUG: print "Cannot open user-supplied database file:", db_f option_parser.error( """Problem with -d and --no_format_db option combination: Cannot open the following user-supplied database file: %s. Consider running without --no_format_db to let formatdb generate these required files""" % db_f) if DEBUG: print "OK: BLAST Database files exist and can be read." print FORMAT_BAR # Perform BLAST search blast_results,hit_ids, removed_hit_ids = find_homologs(options.querydb,\ subject_db, options.e_value,options.max_hits,\ options.working_dir,options.blastmatroot, options.wordsize,\ options.percent_aligned, DEBUG=DEBUG) blast_time = (time() - start_time) - db_format_time if DEBUG: print "BLAST search took: %2.f minute(s)" % (blast_time / 60.0) print FORMAT_BAR #Create output folder outputdir = options.outputdir try: makedirs(outputdir) except OSError: pass #Record raw blast results raw_blast_results_path = join(outputdir, "raw_blast_results.txt") f = open(raw_blast_results_path, 'w') f.writelines(blast_results) f.close() #Record excluded seqs excluded_seqs_path = join(outputdir, "matching.fna") ids_to_seq_file(hit_ids, options.querydb, excluded_seqs_path, "") #Record included (screened) seqs included_seqs_path = join(outputdir, "non-matching.fna") all_ids = ids_from_fasta_lines(open(options.querydb)) included_ids = set(all_ids) - hit_ids ids_to_seq_file(included_ids, options.querydb, included_seqs_path, "") log_lines = compose_logfile_lines(start_time, db_format_time, blast_time,\ option_lines,formatdb_cmd,\ blast_results,options,all_ids,\ hit_ids,removed_hit_ids,\ included_ids,DEBUG) log_path = join(outputdir, "sequence_exclusion.log") if DEBUG: print "Writing summary to: %s" % log_path f = open(log_path, 'w') f.writelines(log_lines) f.close() if not options.no_clean: if DEBUG: print FORMAT_BAR print "| Cleanup |" print FORMAT_BAR if not options.no_format_db: if options.verbose: print "Cleaning up formatdb files:", formatdb_filepaths remove_files(formatdb_filepaths) else: if options.verbose: print "Formatdb not run...nothing to clean"
def test_str_path_is_None(self): """FilePath: str return empty string when path is None """ self.assertEqual(str(FilePath(None)), '')
def test_init(self): """FilePath: initialization returns w/o error """ for p in self.all_paths: self.assertEqual(FilePath(p), p) self.assertEqual(FilePath(''), '')