def check_prereqs() -> List[str]: "Checks if all required files and applications are around" _binary_extensions = ['.h3f', '.h3i', '.h3m', '.h3p'] failure_messages = [] for binary_name in ['hmmpfam2', 'hmmscan', 'hmmpress']: if not path.locate_executable(binary_name): failure_messages.append("Failed to locate file: %r" % binary_name) # Get all HMM profile names from XML file for profile in ["PKSI-KR.hmm2", "PKSI-KS_N.hmm2", "PKSI-KS_C.hmm2", "PKSI-AT.hmm2", "PKSI-ACP.hmm2", "PKSI-DH.hmm2", "Thioesterase.hmm2", "PKSI-ER.hmm2", "aa-activating.aroundLys.hmm2", "p450.hmm2"]: full_hmm_path = path.get_full_path(__file__, "data", profile) if path.locate_file(full_hmm_path) is None: failure_messages.append("Failed to locate file: %s" % profile) continue if profile.endswith(".hmm2"): continue for ext in _binary_extensions: binary = "{hmm}{ext}".format(hmm=full_hmm_path, ext=ext) if not path.locate_file(binary): result = subprocessing.run_hmmpress(full_hmm_path) if not result.successful(): failure_messages.append("Failed to hmmpress {!r}: {!r}".format(profile, result.stderr)) # hmmpress generates _all_ binary files in one go, so stop the loop break binary_mtime = os.path.getmtime(binary) hmm_mtime = os.path.getmtime(full_hmm_path) if hmm_mtime < binary_mtime: # generated file younger than hmm profile, do nothing continue try: for filename in glob.glob("{}.h3?".format(full_hmm_path)): logging.debug("removing outdated file %r", filename) os.remove(filename) except OSError as err: failure_messages.append("Failed to remove outdated binary file for %s: %s" % (profile, err)) break result = subprocessing.run_hmmpress(full_hmm_path) if not result.successful(): failure_messages.append("Failed to hmmpress %r: %r" % (profile, result.stderr)) failure_messages.append("HMM binary files outdated. %s (changed: %s) vs %s (changed: %s)" % (profile, datetime.datetime.fromtimestamp(hmm_mtime), binary, datetime.datetime.fromtimestamp(binary_mtime))) # hmmpress generates _all_ binary files in one go, so stop the loop break return failure_messages
def check_prereqs() -> List[str]: failure_messages = [] for binary_name, optional in [('hmmscan', False), ('hmmpress', False)]: if path.locate_executable(binary_name) is None and not optional: failure_messages.append("Failed to locate executable for %r" % binary_name) markov_models = [path.get_full_path(__file__, 'data', filename) for filename in [ 'abmotifs.hmm', 'dockingdomains.hmm', 'ksdomains.hmm', 'nrpspksdomains.hmm']] binary_extensions = ['.h3f', '.h3i', '.h3m', '.h3p'] for hmm in markov_models: if path.locate_file(hmm) is None: failure_messages.append("Failed to locate file %r" % hmm) continue for ext in binary_extensions: binary = "{}{}".format(hmm, ext) if path.locate_file(binary) is None: result = subprocessing.run_hmmpress(hmm) if not result.successful(): failure_messages.append('Failed to hmmpress {!r}: {}'.format(hmm, result.stderr)) break return failure_messages
def ensure_database_pressed(filepath: str, return_not_raise: bool = False) -> List[str]: """ Ensures that the given HMMer database exists and that the hmmpress generated files aren't out of date. Arguments: filepath: the path to the HMMer database return_not_raise: whether to catch errors and return their messages as strings Returns: any encountered error messages, will never be populated without return_not_raise == True """ components = [ "{}{}".format(filepath, ext) for ext in ['.h3f', '.h3i', '.h3m', '.h3p'] ] if path.is_outdated(components, filepath): logging.info("%s components missing or obsolete, re-pressing database", filepath) result = subprocessing.run_hmmpress(filepath) if not result.successful(): msg = "Failed to hmmpress {!r}: {}".format(filepath, result.stderr) if not return_not_raise: raise RuntimeError(msg) return [msg] return []
def check_prereqs() -> List[str]: """ Check that prereqs are satisfied. hmmpress is only required if the databases have not yet been generated. """ failure_messages = [] for binary_name, optional in [('hmmsearch', False), ('hmmpress', False)]: if path.locate_executable(binary_name) is None and not optional: failure_messages.append("Failed to locate executable for %r" % binary_name) profiles = None # Check that hmmdetails.txt is readable and well-formatted try: profiles = get_signature_profiles() except ValueError as err: failure_messages.append(str(err)) # the path to the markov model hmm = path.get_full_path(__file__, 'data', 'bgc_seeds.hmm') hmm_files = [os.path.join("data", sig.hmm_file) for sig in profiles] if path.locate_file(hmm) is None: # try to generate file from all specified profiles in hmmdetails try: with open(hmm, 'w') as all_hmms_handle: for hmm_file in hmm_files: with open(path.get_full_path(__file__, hmm_file), 'r') as handle: all_hmms_handle.write(handle.read()) except OSError: failure_messages.append('Failed to generate file {!r}'.format(hmm)) # if previous steps have failed, the remainder will too, so don't try if failure_messages: return failure_messages binary_extensions = ['.h3f', '.h3i', '.h3m', '.h3p'] for ext in binary_extensions: binary = "{}{}".format(hmm, ext) if path.locate_file(binary) is None: result = run_hmmpress(hmm) if not result.successful(): failure_messages.append('Failed to hmmpress {!r}: {}'.format( hmm, result.stderr)) break return failure_messages
def ensure_database_pressed(filepath: str, return_not_raise: bool = False) -> List[str]: """ Ensures that the given HMMer database exists and that the hmmpress generated files aren't out of date. Arguments: filepath: the path to the HMMer database return_not_raise: whether to catch errors and return their messages as strings Returns: any encountered error messages, will never be populated without return_not_raise == True """ try: modified_time = os.path.getmtime(filepath) except FileNotFoundError as err: if not return_not_raise: raise return [str(err)] components = [ "{}{}".format(filepath, ext) for ext in ['.h3f', '.h3i', '.h3m', '.h3p'] ] outdated = False for component in components: if not path.locate_file( component) or os.path.getmtime(component) < modified_time: logging.info("%s does not exist or is out of date, hmmpressing %s", component, filepath) outdated = True break if outdated: result = subprocessing.run_hmmpress(filepath) if not result.successful(): msg = "Failed to hmmpress {!r}: {}".format(filepath, result.stderr) if not return_not_raise: raise RuntimeError(msg) return [msg] return []
def check_prereqs() -> List[str]: "Check if all required applications are around" failure_messages = [] for binary_name in ['muscle', 'hmmscan', 'hmmpress', 'fasttree', 'java']: if path.locate_executable(binary_name) is None: failure_messages.append("Failed to locate file: %r" % binary_name) for hmm in ['smcogs.hmm']: hmm = path.get_full_path(__file__, 'data', hmm) if path.locate_file(hmm) is None: failure_messages.append("Failed to locate file %r" % hmm) continue for ext in ['.h3f', '.h3i', '.h3m', '.h3p']: binary = "%s%s" % (hmm, ext) if path.locate_file(binary) is None: # regenerate them result = subprocessing.run_hmmpress(hmm) if not result.successful(): failure_messages.append("Failed to hmmpress %s: %s" % (hmm, result.stderr.rstrip())) break return failure_messages
def check_prereqs() -> List[str]: """ Check the prerequisites. hmmscan: domain detection blastp: CLF and starter unit analysis HMMs: t2pks.hmm Returns: a list of strings describing any errors, if they occurred """ failure_messages = [] for binary_name in ['hmmscan', 'blastp']: if path.locate_executable(binary_name) is None: failure_messages.append("Failed to locate file: %r" % binary_name) for hmm in ['t2pks.hmm']: hmm = path.get_full_path(__file__, 'data', hmm) if path.locate_file(hmm) is None: failure_messages.append("Failed to locate file %r" % hmm) continue for ext in ['.h3f', '.h3i', '.h3m', '.h3p']: binary = "%s%s" % (hmm, ext) if path.locate_file(binary) is None: # regenerate them result = subprocessing.run_hmmpress(hmm) if not result.successful(): failure_messages.append("Failed to hmmpress %s: %s" % (hmm, result.stderr.rstrip())) break for blastdb in ['KSIII', 'AT', 'LIG']: for ext in ['.fasta', '.phr', '.pin', '.psq']: dbfile = path.get_full_path(__file__, 'data', blastdb + ext) if path.locate_file(dbfile) is None: failure_messages.append("Failed to locate file %r" % dbfile) continue return failure_messages