def _validate_config_argument(section, option, default, config_parser): """ Args: section: option: default: config_parser: Returns: """ try: if isinstance(default, bool): value = config_parser.getboolean(section, option, fallback=default) elif isinstance(default, int): value = config_parser.getint(section, option, fallback=default) elif isinstance(default, float): value = config_parser.getfloat(section, option, fallback=default) elif isinstance(default, str): value = config_parser.get(section, option, fallback=default) else: value = default except ValueError: LOG.warning( f'[{section}][{option}] configuration parameter wrongly set. ' f'Falling back to its default value: {default}') value = default LOG.debug(f'[{section}][{option}] {value}') return value
def __setattr__(self, key, value) -> None: """ Overrides method to be used with slots Args: key: An object slotted property value: An intended value for the object key Returns: None """ if hasattr(self, key): if self._preserve_property_type(getattr(self, key), value): super(Config, self).__setattr__(key, value) LOG.info( f'Updating configuration parameter {key.upper()} with value {value}' ) if key == USE_EXTENDED_PATTERN_SYNTAX.lower( ) or key == USE_GRAMMAR_OPERATORS.lower(): self._check_xps_op_restriction() else: LOG.warning( f'Invalid data type {type(value)} for property {key}. Skipping update' ) else: super(Config, self).__setattr__(key, value)
def _is_solution(self) -> None: """ Method to manage AES for the given RUN """ if self.stats.solution_found is False: self.stats.sum_aes(1) if self.fitness_value >= self.config.success_threshold: LOG.debug('Solution found for this run!') self.stats.solution_found = True
def dynamic_generator(samples: [Doc]) -> dict: """ Dynamically generates a grammar in Backus Naur Form (BNF) notation representing the available Spacy NLP Linguistic Feature values of the given sample list of Doc instances Args: samples: List of Spacy Doc objects Returns: Backus Naur Form grammar notation encoded in a dictionary """ config = Config() LOG.info(f'Generating BNF based on the following samples: {str(samples)}') # BNF root pattern_grammar = {S: [P]} # Watch out features of seen samples and max number of tokens per sample max_length_token, min_length_token, features_dict, extended_features = _features_seen(samples) # Update times token per pattern [Min length of tokens, Max length of tokens] interval pattern_grammar[P] = _symbol_stacker(T, max_length_token, min_length_token) # Update times features per token (Max length of features) pattern_grammar[T] = _symbol_stacker(F, _get_features_per_token(features_dict)) if config.use_token_wildcard is True: pattern_grammar[T].append(TOKEN_WILDCARD) # Update available features (just the features list) list_of_features = list(features_dict.keys()) if config.use_grammar_operators is True and config.use_extended_pattern_syntax is False: pattern_grammar = _add_grammar_operators(pattern_grammar, list_of_features) elif config.use_extended_pattern_syntax is True and config.use_grammar_operators is False: pattern_grammar = _add_extended_pattern_syntax(pattern_grammar, list_of_features, features_dict) else: pattern_grammar[F] = list_of_features # Update each feature possible values for k, v in features_dict.items(): if config.use_extended_pattern_syntax is True: v.append(XPS) pattern_grammar.update({k: v}) if config.use_custom_attributes is True: pattern_grammar = _add_custom_attributes(pattern_grammar, extended_features) LOG.info(f'Dynamically generated BNF: {str(pattern_grammar)}') return pattern_grammar
def _check_xps_op_restriction(self) -> None: """ Spacy's Grammar Operators and Quantifiers and the Spacy's Extended Pattern Syntax can not be used together at the same time in a pattern for the Spacy's Rule Based Matcher. This method checks the provided configuration and disables the Spacy's Extended Pattern Syntax if both mechanisms are found enabled at the provided configuration. Returns: None """ if hasattr(self, USE_EXTENDED_PATTERN_SYNTAX.lower()) and hasattr(self, USE_GRAMMAR_OPERATORS.lower()) and \ self.use_extended_pattern_syntax is True and self.use_grammar_operators is True: LOG.warning( f'Extended Pattern Syntax is not compatible with the usage of Grammar Operators. ' f'Extended Pattern Syntax has been disabled!') self.use_extended_pattern_syntax = False
def _wildcard_penalty(self, contact: float) -> float: """ Applies a penalty for the usage of token wildcard if usage of token wildcard is enabled Args: contact: Temporary fitness value for the current individual Returns: Final fitness value for the current individual """ if self.config.use_token_wildcard: num_tokens = len(self.fenotype) for item in self.fenotype: if item == {}: LOG.debug('Applying token wildcard penalty!') penalty = 1 / num_tokens contact -= penalty return contact
def find_patterns( samples: List[str], configuration: Union[str, None] = None, spacy_language_model_name: Union[str, None] = None) -> List[Tuple[Any, ...]]: """ Given some samples, this function finds optimized patterns to be used by the Spacy's Rule Based Matcher. Args: samples: List of strings from where to find common linguistic patterns configuration: (str) Optional configuration file path to to be loaded (Fallbacks to default configuration) spacy_language_model_name: (str) Optional valid Spacy Language Model (Fallbacks to Spacy's en_core_web_sm) Returns: List of patterns found and list of each pattern matching score against the samples """ LOG.info(f'Loading language model {spacy_language_model_name}...') if 'en-core-web-sm' not in [ d.project_name for d in pkg_resources.working_set ]: LOG.info( f'PatternOmatic\'s default spaCy\'s Language Model not installed,' f' proceeding to install en_core_web_sm, please wait...') spacy_download('en_core_web_sm') try: nlp = spacy_load(spacy_language_model_name) except OSError: LOG.warning( f'Model {spacy_language_model_name} not found, ' f'falling back to patternOmatic\'s default language model: en_core_web_sm' ) nlp = spacy_load('en_core_web_sm') LOG.info(f'Building Doc instances...') samples = [nlp(sample) for sample in samples] if isinstance(configuration, str): LOG.info( f'Setting up configuration from the following path: {configuration}...' ) config = Config(config_file_path=configuration) else: config = Config() LOG.info(f'Existing Config instance found: {config}') stats = Stats() bnf_g = dgg(samples) LOG.info('Starting Execution...') for _ in range(0, config.max_runs): start = time.monotonic() p = Population(samples, bnf_g, stats) p.evolve() end = time.monotonic() stats.add_time(end - start) stats.calculate_metrics() LOG.info(f'Execution report {stats}') stats.persist() LOG.info(f'Best individuals for this execution:') stats.most_fitted_accumulator.sort(key=lambda i: i.fitness_value, reverse=True) for individual in stats.most_fitted_accumulator: LOG.info(f'{individual}') return list( zip(*[[i.fenotype, i.fitness_value] for i in stats.most_fitted_accumulator]))
def main(args: List) -> None: """ PatternOmatic's script main function wrapper Args: args: Command Line Input Arguments Returns: None """ LOG.info('Parsing command line arguments...') try: cli = ArgumentParser( description= 'Finds the Spacy\'s Matcher pattern for the given samples', epilog='...using actual Artificial Intelligence') # Samples cli.add_argument('-s', '--sample', action='append', required=True, nargs='+', type=str, help='A sample phrase') # Spacy Language Model cli.add_argument('-l', '--language', nargs='?', type=str, default='en_core_web_sm', help='Spacy language model to be used') # Configuration file to be used cli.add_argument( '-c', '--config', nargs='?', type=str, help='Configuration file path to be used', default=None, ) # Parse command line input arguments/options parsed_args = cli.parse_args(args) # Join sample arguments for index, item in enumerate(parsed_args.sample): parsed_args.sample[index] = ' '.join(item) # # Find patterns # patterns_found, _ = find_patterns( parsed_args.sample, configuration=parsed_args.config, spacy_language_model_name=parsed_args.language) LOG.info(f'Patterns found: {patterns_found}') except Exception as ex: LOG.critical(f'Fatal error: {repr(ex)}') raise ex
def __init__(self, config_file_path: str = None): """ Config object constructor Args: config_file_path: Path for a configuration file """ config_parser = configparser.ConfigParser() if config_file_path is None: LOG.warning( f'Configuration file not provided. Falling back to default values' ) self.file_path = None else: file_list = config_parser.read(config_file_path) if len(file_list) == 0: LOG.warning( f'File {config_file_path} not found. Falling back to default values' ) self.file_path = None else: self.file_path = config_file_path # # GE configuration parameters # self.max_runs = self._validate_config_argument(GE, MAX_RUNS, 4, config_parser) self.success_threshold = self._validate_config_argument( GE, SUCCESS_THRESHOLD, 0.8, config_parser) self.population_size = self._validate_config_argument( GE, POPULATION_SIZE, 10, config_parser) self.max_generations = self._validate_config_argument( GE, MAX_GENERATIONS, 3, config_parser) self.codon_length = self._validate_config_argument( GE, CODON_LENGTH, 8, config_parser) self.num_codons_per_individual = self._validate_config_argument( GE, CODONS_X_INDIVIDUAL, 4, config_parser) self.dna_length = self.codon_length * self.num_codons_per_individual self.mutation_probability = self._validate_config_argument( GE, MUTATION_PROBABILITY, 0.5, config_parser) self.offspring_max_size_factor = self._validate_config_argument( GE, OFFSPRING_FACTOR, 3.5, config_parser) self.mating_probability = self._validate_config_argument( GE, MATING_PROBABILITY, 0.9, config_parser) self.k_value = self._validate_config_argument(GE, K_VALUE, 3, config_parser) # # GE configuration methods # self.selection_type = SelectionType( self._validate_config_argument(GE, SELECTION_TYPE, 0, config_parser)) self.recombination_type = RecombinationType( self._validate_config_argument(GE, RECOMBINATION_TYPE, 0, config_parser)) self.replacement_type = ReplacementType( self._validate_config_argument(GE, REPLACEMENT_TYPE, 0, config_parser)) self.fitness_function_type = FitnessType( self._validate_config_argument(GE, FITNESS_FUNCTION_TYPE, 1, config_parser)) # # BNF Grammar Generation configuration options # self.features_per_token = self._validate_config_argument( DGG, FEATURES_X_TOKEN, 1, config_parser) self.use_boolean_features = self._validate_config_argument( DGG, USE_BOOLEAN_FEATURES, False, config_parser) self.use_custom_attributes = self._validate_config_argument( DGG, USE_CUSTOM_ATTRIBUTES, False, config_parser) self.use_uniques = self._validate_config_argument( DGG, USE_UNIQUES, True, config_parser) self.use_grammar_operators = self._validate_config_argument( DGG, USE_GRAMMAR_OPERATORS, False, config_parser) self.use_token_wildcard = self._validate_config_argument( DGG, USE_TOKEN_WILDCARD, False, config_parser) self.use_extended_pattern_syntax = \ self._validate_config_argument(DGG, USE_EXTENDED_PATTERN_SYNTAX, False, config_parser) # # Configuration validation # self._check_xps_op_restriction() # # IO # self.report_path = \ self._validate_config_argument(IO, REPORT_PATH, '/tmp/patternomatic_report.txt', config_parser) self.report_format = ReportFormat( self._validate_config_argument(IO, REPORT_FORMAT, 0, config_parser)) LOG.info(f'Configuration instance: {self}')
def clear_instance(self): """ For testing purposes, destroy Singleton instance """ LOG.debug('Removing config object!') self._instance = None del self._instance
def __call__(cls, config_file_path: str = None) -> Config: if cls._instance is None: LOG.debug('Creating config object!') cls._instance = super().__call__(config_file_path) return cls._instance