def get_supported_cluster_types() -> List[str]: """ Returns a list of all cluster types for which there are rules """ signature_names = {sig.name for sig in get_signature_profiles()} with open(path.get_full_path(__file__, 'cluster_rules.txt'), "r") as rulefile: rules = rule_parser.Parser("".join(rulefile.readlines()), signature_names).rules clustertypes = [rule.name for rule in rules] return clustertypes
def prepare_data(logging_only: bool = False) -> List[str]: """ Ensures packaged data is fully prepared Arguments: logging_only: whether to return error messages instead of raising exceptions Returns: a list of error messages (only if logging_only is True) """ failure_messages = [] # Check that hmmdetails.txt is readable and well-formatted try: profiles = get_signature_profiles() except ValueError as err: if not logging_only: raise return [str(err)] # the path to the markov model seeds_hmm = path.get_full_path(__file__, 'data', 'bgc_seeds.hmm') hmm_files = [os.path.join("data", sig.hmm_file) for sig in profiles] outdated = False if not path.locate_file(seeds_hmm): logging.debug("%s: %s doesn't exist, regenerating", NAME, seeds_hmm) outdated = True else: seeds_timestamp = os.path.getmtime(seeds_hmm) for component in hmm_files: if os.path.getmtime(component) > seeds_timestamp: logging.debug("%s out of date, regenerating", seeds_hmm) outdated = True break # regenerate if missing or out of date if outdated: # try to generate file from all specified profiles in hmmdetails try: with open(seeds_hmm, 'w') as all_hmms_handle: for hmm_file in hmm_files: with open(path.get_full_path(__file__, hmm_file), 'r') as handle: all_hmms_handle.write(handle.read()) except OSError: if not logging_only: raise failure_messages.append( 'Failed to generate file {!r}'.format(seeds_hmm)) # if regeneration failed, don't try to run hmmpress if failure_messages: return failure_messages failure_messages.extend( hmmer.ensure_database_pressed(seeds_hmm, return_not_raise=logging_only)) return failure_messages
def get_supported_cluster_types(strictness: str) -> List[str]: """ Returns a list of all cluster types for which there are rules """ signature_names = {sig.name for sig in get_signature_profiles()} rules = [] # type: List[rule_parser.DetectionRule] for rule_file in _get_rule_files_for_strictness(strictness): with open(rule_file) as rulefile: rules = rule_parser.Parser("".join(rulefile.readlines()), signature_names, rules).rules clustertypes = [rule.name for rule in rules] return clustertypes
def test_hmm_files_and_details_match(self): data_dir = path.get_full_path(os.path.dirname(__file__), "data", "") details_files = {prof.path for prof in signatures.get_signature_profiles()} details_files = {filepath.replace(data_dir, "") for filepath in details_files} data_dir_contents = set(glob.glob(data_dir + "*.hmm")) data_dir_contents = {filepath.replace(data_dir, "") for filepath in data_dir_contents} # ignore bgc_seeds.hmm for the sake of comparison, it's a generated aggregate data_dir_contents.discard("bgc_seeds.hmm") missing_files = details_files - data_dir_contents assert not missing_files extra_files = data_dir_contents - details_files assert not extra_files # finally, just to be sure assert data_dir_contents == details_files
def check_prereqs() -> List[str]: """ Check that prereqs are satisfied. hmmpress is only required if the databases have not yet been generated. """ failure_messages = [] for binary_name, optional in [('hmmsearch', False), ('hmmpress', False)]: if path.locate_executable(binary_name) is None and not optional: failure_messages.append("Failed to locate executable for %r" % binary_name) profiles = None # Check that hmmdetails.txt is readable and well-formatted try: profiles = get_signature_profiles() except ValueError as err: failure_messages.append(str(err)) # the path to the markov model hmm = path.get_full_path(__file__, 'data', 'bgc_seeds.hmm') hmm_files = [os.path.join("data", sig.hmm_file) for sig in profiles] if path.locate_file(hmm) is None: # try to generate file from all specified profiles in hmmdetails try: with open(hmm, 'w') as all_hmms_handle: for hmm_file in hmm_files: with open(path.get_full_path(__file__, hmm_file), 'r') as handle: all_hmms_handle.write(handle.read()) except OSError: failure_messages.append('Failed to generate file {!r}'.format(hmm)) # if previous steps have failed, the remainder will too, so don't try if failure_messages: return failure_messages binary_extensions = ['.h3f', '.h3i', '.h3m', '.h3p'] for ext in binary_extensions: binary = "{}{}".format(hmm, ext) if path.locate_file(binary) is None: result = run_hmmpress(hmm) if not result.successful(): failure_messages.append('Failed to hmmpress {!r}: {}'.format( hmm, result.stderr)) break return failure_messages
def get_supported_cluster_types(strictness: str, category: Optional[str] = None) -> List[str]: """ Returns a list of all cluster types for which there are rules """ signature_names = {sig.name for sig in get_signature_profiles()} category_names = {cat.name for cat in get_rule_categories()} rules: List[rule_parser.DetectionRule] = [] aliases: Dict[str, List[rule_parser.Token]] = {} for rule_file in _get_rule_files_for_strictness(strictness): with open(rule_file) as rulefile: rules = rule_parser.Parser("".join(rulefile.readlines()), signature_names, category_names, rules, aliases).rules clustertypes = [ rule.name for rule in rules if category is None or rule.category == category ] return clustertypes
def test_profiles_parsing(self): profiles = signatures.get_signature_profiles() assert len(profiles) == 250 # ensures we don't delete any by accident