def run_prodigal(record: Record, options: ConfigType) -> None: """ Run progidal to annotate prokaryotic sequences """ if "basedir" in options.get('prodigal', ''): basedir = options.prodigal.basedir else: basedir = "" with TemporaryDirectory(change=True): name = record.id.lstrip('-') if not name: name = "unknown" fasta_file = '%s.fasta' % name result_file = '%s.predict' % name with open(fasta_file, 'w') as handle: seqio.write([record.to_biopython()], handle, 'fasta') # run prodigal prodigal = [path.join(basedir, 'prodigal')] prodigal.extend(['-i', fasta_file, '-f', 'sco', '-o', result_file]) if options.genefinding_tool == "prodigal-m" or len(record.seq) < 20000: prodigal.extend(['-p', 'meta']) err = execute(prodigal).stderr if err.find('Error') > -1: logging.error("Failed to run prodigal: %r", err) raise RuntimeError("prodigal error: %s" % err) found = 0 for line in open(result_file, 'r'): # skip first line if not line.startswith('>'): continue name, start_chunk, end_chunk, prodigal_strand = line[1:].rstrip( ).split("_") try: start = int(start_chunk) end = int(end_chunk) if prodigal_strand == "+": strand = 1 else: strand = -1 except ValueError: logging.error('Malformatted prodigal output line %r', line.rstrip()) continue if start > end: strand = -1 start, end = end, start loc = FeatureLocation(start - 1, end, strand=strand) translation = record.get_aa_translation_from_location(loc) feature = CDSFeature(loc, locus_tag='ctg%s_%s' % (record.record_index, name), translation=translation, translation_table=record.transl_table) record.add_cds_feature(feature) found += 1 logging.debug("prodigal found %d CDS features", found)
def _run_antismash(sequence_file: Optional[str], options: ConfigType) -> int: """ The real run_antismash, assumes logging is set up around it """ logging.info("antiSMASH version: %s", options.version) _log_found_executables(options) detection_modules = get_detection_modules() analysis_modules = get_analysis_modules() output_modules = get_output_modules() modules = detection_modules + analysis_modules + output_modules if options.list_plugins: list_plugins(modules) return 0 options.all_enabled_modules = list(filter(lambda x: x.is_enabled(options), modules)) if options.check_prereqs_only: try: check_prerequisites(modules, options) except RuntimeError: print("Some module prerequisites not satisfied") return 1 print("All prerequisites satisfied") return 0 else: check_prerequisites(options.all_enabled_modules, options) # start up profiling if relevant if options.profile: profiler = cProfile.Profile() profiler.enable() # ensure the provided options are valid if not verify_options(options, options.all_enabled_modules): return 1 # check that at least one module will run if not options.all_enabled_modules: raise ValueError("No detection or analysis modules enabled") start_time = datetime.now() results = read_data(sequence_file, options) # reset module timings results.timings_by_record.clear() prepare_output_directory(options.output_dir, sequence_file or options.reuse_results) results.records = record_processing.pre_process_sequences(results.records, options, cast(AntismashModule, genefinding)) for record, module_results in zip(results.records, results.results): # skip if we're not interested in it if record.skip: continue logging.info("Analysing record: %s", record.id) timings = run_detection(record, options, module_results) # and skip analysis if detection didn't find anything if not record.get_regions(): continue analysis_timings = analyse_record(record, options, analysis_modules, module_results) timings.update(analysis_timings) results.timings_by_record[record.id] = timings # Write results json_filename = os.path.join(options.output_dir, results.input_file) json_filename = os.path.splitext(json_filename)[0] + ".json" logging.debug("Writing json results to '%s'", json_filename) results.write_to_file(json_filename) # now that the json is out of the way, annotate the record # otherwise we could double annotate some areas annotate_records(results) # create relevant output files write_outputs(results, options) # save profiling data if options.profile: profiler.disable() write_profiling_results(profiler, os.path.join(options.output_dir, "profiling_results")) running_time = datetime.now() - start_time # display module runtimes before total time if options.debug: log_module_runtimes(results.timings_by_record) logging.debug("antiSMASH calculation finished at %s; runtime: %s", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), str(running_time)) logging.info("antiSMASH status: SUCCESS") return 0
def run_antismash(sequence_file: Optional[str], options: ConfigType) -> int: """ The complete antismash pipeline. Reads in data, runs detection and analysis modules over any records found, then outputs the results to file. Arguments: sequence_file: the sequence file to read in records from, can be None if reusing results options: command line options detection_modules: None or a list of modules to use for detection, if None defaults will be used analysis_modules: None or a list of modules to use for analysis, if None defaults will be used Returns: 0 if requested operations completed succesfully, otherwise 1 Exceptions may also be raised """ setup_logging(logfile=options.logfile, verbose=options.verbose, debug=options.debug) detection_modules = get_detection_modules() analysis_modules = get_analysis_modules() modules = detection_modules + analysis_modules if options.list_plugins: list_plugins(modules) return 0 options.all_enabled_modules = list( filter(lambda x: x.is_enabled(options), modules)) if options.check_prereqs_only: try: check_prerequisites(modules) except RuntimeError: print("Some module prerequisites not satisfied") return 1 print("All prerequisites satisfied") return 0 else: check_prerequisites(options.all_enabled_modules) # start up profiling if relevant if options.profile: profiler = cProfile.Profile() profiler.enable() # ensure the provided options are valid if not verify_options(options, options.all_enabled_modules): return 1 # TODO: change to a raise? # check that at least one module will run if not options.all_enabled_modules: raise ValueError("No detection or analysis modules enabled") start_time = datetime.now() results = read_data(sequence_file, options) # reset module timings results.timings_by_record.clear() prepare_output_directory(options.output_dir, sequence_file or options.reuse_results) results.records = record_processing.pre_process_sequences( results.records, options, cast(AntismashModule, genefinding)) for record, module_results in zip(results.records, results.results): # skip if we're not interested in it if record.skip: continue timings = run_detection(record, options, module_results) # and skip analysis if detection didn't find anything if not record.get_clusters(): continue analysis_timings = analyse_record(record, options, analysis_modules, module_results) timings.update(analysis_timings) results.timings_by_record[record.id] = timings # Write results json_filename = os.path.join(options.output_dir, results.input_file) json_filename = os.path.splitext(json_filename)[0] + ".json" logging.debug("Writing json results to '%s'", json_filename) results.write_to_file(json_filename) # now that the json is out of the way, annotate the record # otherwise we could double annotate some areas annotate_records(results) # create relevant output files write_outputs(results, options) # save profiling data if options.profile: profiler.disable() write_profiling_results( profiler, os.path.join(options.output_dir, "profiling_results")) running_time = datetime.now() - start_time # display module runtimes before total time if options.debug: log_module_runtimes(results.timings_by_record) logging.debug("antiSMASH calculation finished at %s; runtime: %s", str(datetime.now()), str(running_time)) logging.info("antiSMASH status: SUCCESS") return 0