示例#1
0
def get_supported_cluster_types() -> List[str]:
    """ Returns a list of all cluster types for which there are rules
    """
    signature_names = {sig.name for sig in get_signature_profiles()}
    with open(path.get_full_path(__file__, 'cluster_rules.txt'), "r") as rulefile:
        rules = rule_parser.Parser("".join(rulefile.readlines()), signature_names).rules
        clustertypes = [rule.name for rule in rules]
    return clustertypes
示例#2
0
def prepare_data(logging_only: bool = False) -> List[str]:
    """ Ensures packaged data is fully prepared

        Arguments:
            logging_only: whether to return error messages instead of raising exceptions

        Returns:
            a list of error messages (only if logging_only is True)
    """
    failure_messages = []

    # Check that hmmdetails.txt is readable and well-formatted
    try:
        profiles = get_signature_profiles()
    except ValueError as err:
        if not logging_only:
            raise
        return [str(err)]

    # the path to the markov model
    seeds_hmm = path.get_full_path(__file__, 'data', 'bgc_seeds.hmm')
    hmm_files = [os.path.join("data", sig.hmm_file) for sig in profiles]
    outdated = False
    if not path.locate_file(seeds_hmm):
        logging.debug("%s: %s doesn't exist, regenerating", NAME, seeds_hmm)
        outdated = True
    else:
        seeds_timestamp = os.path.getmtime(seeds_hmm)
        for component in hmm_files:
            if os.path.getmtime(component) > seeds_timestamp:
                logging.debug("%s out of date, regenerating", seeds_hmm)
                outdated = True
                break

    # regenerate if missing or out of date
    if outdated:
        # try to generate file from all specified profiles in hmmdetails
        try:
            with open(seeds_hmm, 'w') as all_hmms_handle:
                for hmm_file in hmm_files:
                    with open(path.get_full_path(__file__, hmm_file),
                              'r') as handle:
                        all_hmms_handle.write(handle.read())
        except OSError:
            if not logging_only:
                raise
            failure_messages.append(
                'Failed to generate file {!r}'.format(seeds_hmm))

    # if regeneration failed, don't try to run hmmpress
    if failure_messages:
        return failure_messages

    failure_messages.extend(
        hmmer.ensure_database_pressed(seeds_hmm,
                                      return_not_raise=logging_only))

    return failure_messages
示例#3
0
def get_supported_cluster_types(strictness: str) -> List[str]:
    """ Returns a list of all cluster types for which there are rules
    """
    signature_names = {sig.name for sig in get_signature_profiles()}
    rules = []  # type: List[rule_parser.DetectionRule]
    for rule_file in _get_rule_files_for_strictness(strictness):
        with open(rule_file) as rulefile:
            rules = rule_parser.Parser("".join(rulefile.readlines()), signature_names, rules).rules
    clustertypes = [rule.name for rule in rules]
    return clustertypes
示例#4
0
 def test_hmm_files_and_details_match(self):
     data_dir = path.get_full_path(os.path.dirname(__file__), "data", "")
     details_files = {prof.path for prof in signatures.get_signature_profiles()}
     details_files = {filepath.replace(data_dir, "") for filepath in details_files}
     data_dir_contents = set(glob.glob(data_dir + "*.hmm"))
     data_dir_contents = {filepath.replace(data_dir, "") for filepath in data_dir_contents}
     # ignore bgc_seeds.hmm for the sake of comparison, it's a generated aggregate
     data_dir_contents.discard("bgc_seeds.hmm")
     missing_files = details_files - data_dir_contents
     assert not missing_files
     extra_files = data_dir_contents - details_files
     assert not extra_files
     # finally, just to be sure
     assert data_dir_contents == details_files
示例#5
0
def check_prereqs() -> List[str]:
    """ Check that prereqs are satisfied. hmmpress is only required if the
        databases have not yet been generated.
    """
    failure_messages = []
    for binary_name, optional in [('hmmsearch', False), ('hmmpress', False)]:
        if path.locate_executable(binary_name) is None and not optional:
            failure_messages.append("Failed to locate executable for %r" %
                                    binary_name)

    profiles = None
    # Check that hmmdetails.txt is readable and well-formatted
    try:
        profiles = get_signature_profiles()
    except ValueError as err:
        failure_messages.append(str(err))

    # the path to the markov model
    hmm = path.get_full_path(__file__, 'data', 'bgc_seeds.hmm')
    hmm_files = [os.path.join("data", sig.hmm_file) for sig in profiles]
    if path.locate_file(hmm) is None:
        # try to generate file from all specified profiles in hmmdetails
        try:
            with open(hmm, 'w') as all_hmms_handle:
                for hmm_file in hmm_files:
                    with open(path.get_full_path(__file__, hmm_file),
                              'r') as handle:
                        all_hmms_handle.write(handle.read())
        except OSError:
            failure_messages.append('Failed to generate file {!r}'.format(hmm))

    # if previous steps have failed, the remainder will too, so don't try
    if failure_messages:
        return failure_messages

    binary_extensions = ['.h3f', '.h3i', '.h3m', '.h3p']
    for ext in binary_extensions:
        binary = "{}{}".format(hmm, ext)
        if path.locate_file(binary) is None:
            result = run_hmmpress(hmm)
            if not result.successful():
                failure_messages.append('Failed to hmmpress {!r}: {}'.format(
                    hmm, result.stderr))
            break

    return failure_messages
示例#6
0
def get_supported_cluster_types(strictness: str,
                                category: Optional[str] = None) -> List[str]:
    """ Returns a list of all cluster types for which there are rules
    """
    signature_names = {sig.name for sig in get_signature_profiles()}
    category_names = {cat.name for cat in get_rule_categories()}
    rules: List[rule_parser.DetectionRule] = []
    aliases: Dict[str, List[rule_parser.Token]] = {}
    for rule_file in _get_rule_files_for_strictness(strictness):
        with open(rule_file) as rulefile:
            rules = rule_parser.Parser("".join(rulefile.readlines()),
                                       signature_names, category_names, rules,
                                       aliases).rules
    clustertypes = [
        rule.name for rule in rules
        if category is None or rule.category == category
    ]
    return clustertypes
 def test_profiles_parsing(self):
     profiles = signatures.get_signature_profiles()
     assert len(profiles) == 250  # ensures we don't delete any by accident