def test_str(self): """FilePath: str wraps path in quotes """ # Do one explicit test (for sanity), then automatically run # through the examples self.assertEqual(str(FilePath(self.filename)), '"filename.txt"') for p in self.all_paths: self.assertEqual(str(FilePath(p)), '"' + p + '"')
def test_FilePath_identity_preserved(self): """FilePath: trivial actions on FilePaths yeild original FilePath """ p = FilePath(self.filename) # Creating FilePath from FilePath results in FilePath # equal to original self.assertEqual(FilePath(p), p) for p in self.all_paths: self.assertEqual(FilePath(p), p) # Appending an empty FilePath to a FilePath results in FilePath # equal to original self.assertEqual(p + FilePath(''), p)
def test_add(self): """FilePath: add (or joining of paths) functions as expected """ actual = FilePath(self.relative_dir_path) + FilePath(self.filename) expected = FilePath('a/relative/path/filename.txt') self.assertEqual(actual, expected) # result is a FilePath assert isinstance(actual, FilePath) # appending a string to a FilePath results in a FilePath actual = FilePath(self.relative_dir_path) + 'filename.txt' expected = FilePath('a/relative/path/filename.txt') self.assertEqual(actual, expected) # result is a FilePath assert isinstance(actual, FilePath)
def _absolute(self, path): """ Convert a filename to an absolute path """ path = FilePath(path) if isabs(path): return path else: # these are both Path objects, so joining with + is acceptable return self.WorkingDir + path
def __init__(self, Path, IsWritten=True): """ Initialize the ResultPath object Path: a string representing the absolute or relative path where the file can be found IsWritten: a boolean specifying whether the file has been written, default = True """ self.Path = FilePath(Path) self.IsWritten = IsWritten
def _input_as_path(self, data): """ Return data as string with the path wrapped in quotes data: path or filename, most likely as a string * Note: the result will be the filename as a FilePath object (which is a string subclass). """ return FilePath(data)
def _input_as_lines(self, data): """ Write a seq of lines to a temp file and return the filename string data: a sequence to be written to a file, each element of the sequence will compose a line in the file * Note: the result will be the filename as a FilePath object (which is a string subclass). * Note: '\n' will be stripped off the end of each sequence element before writing to a file in order to avoid multiple new lines accidentally be written to a file """ filename = self._input_filename = \ FilePath(self.getTmpFilename(self.TmpDir)) filename = FilePath(filename) data_file = open(filename, 'w') data_to_file = '\n'.join([str(d).strip('\n') for d in data]) data_file.write(data_to_file) data_file.close() return filename
def _set_WorkingDir(self, path): """Sets the working directory Appends a slash to the end of path The reasoning behind this is that the user may or may not pass in a path with a '/' at the end. Since having multiple '/' at the end doesn't hurt anything, it's convienient to be able to rely on it, and not have to check for it """ self._curr_working_dir = FilePath(path) + '/' try: mkdir(self.WorkingDir) except OSError: # Directory already exists pass
def _input_as_multiline_string(self, data): """Write a multiline string to a temp file and return the filename. data: a multiline string to be written to a file. * Note: the result will be the filename as a FilePath object (which is a string subclass). """ filename = self._input_filename = \ FilePath(self.getTmpFilename(self.TmpDir)) data_file = open(filename, 'w') data_file.write(data) data_file.close() return filename
def main(): option_parser, options, args = parse_command_line_parameters(**script_info) DEBUG = options.verbose check_options(option_parser, options) start_time = time() option_lines = format_options_as_lines(options) if DEBUG: print FORMAT_BAR print "Running with options:" for line in sorted(option_lines): print line print FORMAT_BAR # because the blast app controller uses absolute paths, make sure subject # db path is fully specified subject_db = options.subjectdb if not subject_db.startswith('/'): subject_db = join(getcwd(), subject_db) if not options.no_format_db: # initialize object inpath = FilePath(abspath(options.subjectdb)) subject_dir, subj_file = split(inpath) fdb = FormatDb(WorkingDir=subject_dir) # Currently we do not support protein blasts, but # this would be easy to add in the future... fdb.Parameters['-p'].on('F') # Create indices for record lookup fdb.Parameters['-o'].on('T') # Set input database fdb.Parameters['-i'].on(subject_db) formatdb_cmd = fdb.BaseCommand if DEBUG: print "Formatting db with command: %s" % formatdb_cmd app_result = fdb(subject_db) formatdb_filepaths = [] for v in app_result.values(): try: formatdb_filepaths.append(v.name) except AttributeError: # not a file object, so no path to return pass db_format_time = time() - start_time if DEBUG: print "Formatting subject db took: %2.f seconds" % db_format_time print "formatdb log file written to: %s" % app_result['log'] print FORMAT_BAR else: db_format_time = time() - start_time formatdb_cmd = "None (formatdb not called)" # Check that User-Supplied subjectdb is valid db_ext = [".nhr", ".nin", ".nsd", ".nsi", ".nsq"] formatdb_filepaths = [subject_db + ext for ext in db_ext] if DEBUG: print "Checking that pre-existing formatdb files exist and can be read." print "Files to be checked:" for fp in formatdb_filepaths: print fp print FORMAT_BAR try: formatdb_files = [open(db_f, "U") for db_f in formatdb_filepaths] [f.close() for f in formatdb_files] except IOError: if DEBUG: print "Cannot open user-supplied database file:", db_f option_parser.error( """Problem with -d and --no_format_db option combination: Cannot open the following user-supplied database file: %s. Consider running without --no_format_db to let formatdb generate these required files""" % db_f) if DEBUG: print "OK: BLAST Database files exist and can be read." print FORMAT_BAR # Perform BLAST search blast_results, hit_ids, removed_hit_ids = find_homologs( options.querydb, subject_db, options.e_value, options.max_hits, options.working_dir, options.blastmatroot, options.wordsize, options.percent_aligned, DEBUG=DEBUG) blast_time = (time() - start_time) - db_format_time if DEBUG: print "BLAST search took: %2.f minute(s)" % (blast_time / 60.0) print FORMAT_BAR # Create output folder outputdir = options.outputdir try: makedirs(outputdir) except OSError: pass # Record raw blast results raw_blast_results_path = join(outputdir, "raw_blast_results.txt") f = open(raw_blast_results_path, 'w') f.writelines(blast_results) f.close() # Record excluded seqs excluded_seqs_path = join(outputdir, "matching.fna") ids_to_seq_file(hit_ids, options.querydb, excluded_seqs_path, "") # Record included (screened) seqs included_seqs_path = join(outputdir, "non-matching.fna") all_ids = ids_from_fasta_lines(open(options.querydb)) included_ids = set(all_ids) - hit_ids ids_to_seq_file(included_ids, options.querydb, included_seqs_path, "") log_lines = compose_logfile_lines(start_time, db_format_time, blast_time, option_lines, formatdb_cmd, blast_results, options, all_ids, hit_ids, removed_hit_ids, included_ids, DEBUG) log_path = join(outputdir, "sequence_exclusion.log") if DEBUG: print "Writing summary to: %s" % log_path f = open(log_path, 'w') f.writelines(log_lines) f.close() if not options.no_clean: if DEBUG: print FORMAT_BAR print "| Cleanup |" print FORMAT_BAR if not options.no_format_db: if options.verbose: print "Cleaning up formatdb files:", formatdb_filepaths remove_files(formatdb_filepaths) else: if options.verbose: print "Formatdb not run...nothing to clean"
def __call__(self, data=None, remove_tmp=True): """Run the application with the specified kwargs on data data: anything that can be cast into a string or written out to a file. Usually either a list of things or a single string or number. input_handler will be called on this data before it is passed as part of the command-line argument, so by creating your own input handlers you can customize what kind of data you want your application to accept remove_tmp: if True, removes tmp files """ input_handler = self.InputHandler suppress_stdout = self.SuppressStdout suppress_stderr = self.SuppressStderr if suppress_stdout: outfile = FilePath('/dev/null') else: outfile = self.getTmpFilename(self.TmpDir) if suppress_stderr: errfile = FilePath('/dev/null') else: errfile = FilePath(self.getTmpFilename(self.TmpDir)) if data is None: input_arg = '' else: input_arg = getattr(self, input_handler)(data) # Build up the command, consisting of a BaseCommand followed by # input and output (file) specifications command = self._command_delimiter.join(filter(None, [self.BaseCommand, str(input_arg), '>', str(outfile), '2>', str(errfile)])) if self.HaltExec: raise AssertionError("Halted exec with command:\n" + command) # The return value of system is a 16-bit number containing the signal # number that killed the process, and then the exit status. # We only want to keep the exit status so do a right bitwise shift to # get rid of the signal number byte exit_status = system(command) >> 8 # Determine if error should be raised due to exit status of # appliciation if not self._accept_exit_status(exit_status): raise ApplicationError('Unacceptable application exit ' + 'status: %s\n' % str(exit_status) + 'Command:\n%s\n' % command + 'StdOut:\n%s\n' % open(outfile).read() + 'StdErr:\n%s\n' % open(errfile).read()) # open the stdout and stderr if not being suppressed out = None if not suppress_stdout: out = open(outfile, "r") err = None if not suppress_stderr: err = open(errfile, "r") result_paths = self._get_result_paths(data) try: result = \ CommandLineAppResult(out, err, exit_status, result_paths=result_paths) except ApplicationError: result = \ self._handle_app_result_build_failure(out, err, exit_status, result_paths) # Clean up the input file if one was created if remove_tmp: if self._input_filename: remove(self._input_filename) self._input_filename = None return result
def __init__(self, params=None, InputHandler=None, SuppressStderr=None, SuppressStdout=None, WorkingDir=None, TmpDir='/tmp', TmpNameLen=20, HALT_EXEC=False): """ Initialize the CommandLineApplication object params: a dictionary mapping the Parameter id or synonym to its value (or None for FlagParameters or MixedParameters in flag mode) for Parameters that should be turned on InputHandler: this is the method to be run on data when it is passed into call. This should be a string containing the method name. The default is _input_as_string which casts data to a string before appending it to the command line argument SuppressStderr: if set to True, will route standard error to /dev/null, False by default SuppressStdout: if set to True, will route standard out to /dev/null, False by default WorkingDir: the directory where you want the application to run, default is the current working directory, but is useful to change in cases where the program being run creates output to its current working directory and you either don't want it to end up where you are running the program, or the user running the script doesn't have write access to the current working directory WARNING: WorkingDir MUST be an absolute path! TmpDir: the directory where temp files will be created, /tmp by default TmpNameLen: the length of the temp file name HALT_EXEC: if True, raises exception w/ command output just before execution, doesn't clean up temp files. Default False. """ # Determine if the application is installed, and raise an error if not self._error_on_missing_application(params) # set attributes to parameter that was passed in or class default if InputHandler is not None: self.InputHandler = InputHandler else: self.InputHandler = self._input_handler if SuppressStderr is not None: self.SuppressStderr = SuppressStderr else: self.SuppressStderr = self._suppress_stderr if SuppressStdout is not None: self.SuppressStdout = SuppressStdout else: self.SuppressStdout = self._suppress_stdout if WorkingDir is not None: working_dir = WorkingDir else: working_dir = self._working_dir or getcwd() self.WorkingDir = FilePath(working_dir) self.TmpDir = FilePath(TmpDir) self.TmpNameLen = TmpNameLen self.HaltExec = HALT_EXEC # create a variable to hold the name of the file being used as # input to the application. this is important especially when # you are using an input handler which creates a temporary file # and the output filenames are based on the input filenames self._input_filename = None super(CommandLineApplication, self).__init__(params=params)
def test_str_path_is_None(self): """FilePath: str return empty string when path is None """ self.assertEqual(str(FilePath(None)), '')
def test_init(self): """FilePath: initialization returns w/o error """ for p in self.all_paths: self.assertEqual(FilePath(p), p) self.assertEqual(FilePath(''), '')