def test_get_hypothesis_from_debug_string(self): from simulations import french_two_rules as simulation self.initialise_simulation(simulation) hypothesis_string = """ HMM: states ['q1', 'q2'], transitions {'q1': ['q2'], 'q2': ['qf'], 'q0': ['q1']}, emissions {'q1': ['arb', 'tab', 'kup', 'yrl', 'purp', 'filt', 'romp', 'byl', 'dart', 'mord', 'kuverk', 'prut', 'kylt', 'amur', 'klop', 'film', 'kurb', 'kapt', 'tabl', 'krab', 'karaf', 'parl', 'provok', 'filtr', 'klad', 'kuverkl', 'purpr', 'odor', 'arbr', 'furyr', 'burk', 'kupl', 'batir', 'rompr', 'mordr', 'dartr'], 'q2': ['mal', 'iv', 'ε', 'fad', 'puri', 'byvab', 'kif', 'timid', 'abil']} HMM: q0: ['q1'] q1: ['q2'], ['amur', 'arb', 'arbr', 'batir', 'burk', 'byl', 'dart', 'dartr', 'film', 'filt', 'filtr', 'furyr', 'kapt', 'karaf', 'klad', 'klop', 'krab', 'kup', 'kupl', 'kurb', 'kuverk', 'kuverkl', 'kylt', 'mord', 'mordr', 'odor', 'parl', 'provok', 'prut', 'purp', 'purpr', 'romp', 'rompr', 'tab', 'tabl', 'yrl'] q2: ['qf'], ['abil', 'byvab', 'fad', 'iv', 'kif', 'mal', 'puri', 'timid', 'ε'] q0->q1->q2->qf Rule Set: transducer_generated: [] --> [{'back': '+', 'center': '+', 'high': '-', 'low': '-'}] / [{'cons': '+'}, {'strident': '-'}, {'MB': True}]__[{'cons': '+'}] obligatory: False | ε --> ['e'] / [['p', 'd', 'v', 't', 'k', 'l', 'r', 'm', 'f', 'b'], ['p', 'm', 'd', 'e', 't', 'k', 'a', 'i', 'l', 'r', 'u', 'o', 'y', 'b'], ['B']]__[['p', 'd', 'v', 't', 'k', 'l', 'r', 'm', 'f', 'b']] obligatory: False [] --> [{'back': '-', 'liquid': '+', 'strident': '-', 'voice': '+'}] / [{'cons': '-', 'lateral': '-'}, {'back': '-', 'center': '-', 'liquid': '-', 'voice': '+'}, {'MB': True}]__[] obligatory: False | ε --> ['l', 'r'] / [['e', 'a', 'i', 'u', 'o', 'y'], ['d', 'b', 'i', 'm', 'y', 'v'], ['B']]__[] obligatory: False [] --> [{'lateral': '+'}] / [{'MB': True}]__[{'MB': True}, {'lateral': '-', 'voice': '+'}, {'voice': '+'}] obligatory: False | ε --> ['l'] / [['B']]__[['B'], ['o', 'd', 'b', 'e', 'a', 'i', 'r', 'u', 'm', 'y', 'v'], ['m', 'd', 'v', 'e', 'a', 'i', 'l', 'r', 'u', 'o', 'y', 'b']] obligatory: False not transducer_generated: Energy: 250,819.10989435515 (data_by_grammar: 230,883.2703113556, hmm: 19,624.83958299955, rule_set: 311.0) """ hypothesis = get_hypothesis_from_log_string(hypothesis_string) log_hypothesis(hypothesis) print(hypothesis.get_energy()) print(hypothesis.get_recent_energy_signature())
def test_get_random_hypothesis(self): self.configurations["EVOLVE_HMM"] = True self.configurations["EVOLVE_RULES"] = True self.initialise_segment_table("plural_english_segment_table.txt") data = ['kats', 'dogz', 'kat', 'dog'] rand_hypothesis = Hypothesis.get_random_hypothesis(data) log_hypothesis(rand_hypothesis)
def run(self): self.evaluate_population() self.keep_elite() self.hall_of_fame.update(self.population) self.debug_generation() for _ in range(ga_config.TOTAL_GENERATIONS): self.make_generation() self.generation += 1 self.debug_generation() self.cxpb *= ga_config.CROSSOVER_COOLING_RATE self.mutpb *= ga_config.MUTATION_COOLING_RATE best_hypothesis = self.hall_of_fame[0] self.logger.info("** {} final best hypothesis: ".format( self.island_name)) log_hypothesis(best_hypothesis, self.logger.info) try: if self.result_queue: self.result_queue.put((self.island_name, best_hypothesis), block=False) except Full: self.logger.error('{}: Result queue full'.format(self.island_name)) self.logger.finalize_log()
def collect_all_island_results(self): best_energy = float("inf") best_hypothesis = None self.flush_queue() self.logger.info("Looking at {}/{} results".format( len(self.final_queue), self.local_simulation_num_islands)) all_islands = [ f'island_{i}' for i in range(self.first_island_idx, self.last_island_idx + 1) ] for island, hypothesis in self.final_queue: self.logger.info('{} best hypothesis:'.format(island)) all_islands.remove(island) log_hypothesis(hypothesis, self.logger.info) energy = hypothesis.get_energy() if energy < best_energy: best_energy = energy best_hypothesis = hypothesis self.best_energy = best_energy self.best_hypothesis = best_hypothesis if self.best_hypothesis: self.logger.info('*Best hypothesis from all islands:*') log_hypothesis(self.best_hypothesis, self.logger.info) if all_islands: self.logger.info(f'{len(all_islands)} missing: {all_islands}')
def debug_generation(self): self.hall_of_fame.update(self.population) record = self.stats.compile(self.population) if self.stats else {} self.logbook.record(gen=self.generation, nevals=self.new_individuals_in_generation, **record) self.logger.info(self.logbook.stream) best_hypothesis_str = hypothesis_to_string(self.hall_of_fame[0]) record.update({ 'generation': self.generation, 'best_hypothesis': best_hypothesis_str }) self.logger.log_stats_record(record, self.island_number) if self.generation > 0 and self.generation % ga_config.CLEAR_RULE_SET_CACHE_INTERVAL == 0: RuleSet.clear_caching() if ga_config.DUMP_ALL_POPULATION_EVERY_N_GENERATIONS > 0 and self.generation % ga_config.DUMP_ALL_POPULATION_EVERY_N_GENERATIONS == 0 and self.generation > 0: self.dump_population() if self.generation % ga_config.HALL_OF_FAME_DEBUG_INTERVAL == 0: self.logger.debug('\n\n**** {} top {} hypothesis:****\n'.format( self.island_name, ga_config.HALL_OF_FAME_HYPOTHESES)) for i in range(ga_config.HALL_OF_FAME_HYPOTHESES): try: hypo = self.hall_of_fame[i] self.logger.debug('** #{} **'.format(i + 1)) log_hypothesis(hypo, self.logger.debug) self.logger.debug('\n') except IndexError: break
def log_all_population(self): self.logger.debug( "\n\n**** Population dump: Generation {} ****\n".format( self.generation)) for h, hypo in enumerate(self.population): self.logger.debug('Hypothesis {}\n'.format(h)) log_hypothesis(hypo, self.logger.debug) self.logger.debug('\n') self.logger.debug("**** End population dump ****".format( self.generation))
def finalize_simulation(self): self.dump_population() best_hypothesis = self.hall_of_fame[0] self.logger.info("{} best hypothesis: ".format(self.island_name)) log_hypothesis(best_hypothesis, self.logger.info) if self.generation >= self.simulation_total_generations: if self.result_queue: self.result_queue.put((self.island_name, best_hypothesis), block=False) self.logger.finalize_log(self.island_number)
def init_target_hypothesis(self): target_tuple = self.simulation.target_tuple target_rule_set = RuleSet.load_form_flat_list(target_tuple[1]) target_hypothesis = Hypothesis.create_hypothesis( HMM(deepcopy(target_tuple[0])), target_rule_set) target_energy = target_hypothesis.get_energy() self.logger.info('Target hypothesis:') log_hypothesis(target_hypothesis, self.logger.info) self.logger.info('Target energy: {}'.format(target_energy)) self.logger.info('Target hypothesis energy signature: {}'.format( target_hypothesis.get_recent_energy_signature())) return target_hypothesis, target_energy
def test_representative_random_hypothesis_rate(self): # Check how often random hypothesis represents the data self.initialise_segment_table("plural_english_segment_table.txt") data = ['kats', 'dogz', 'kat', 'dog'] total_hypotheses = 0 energy = float("inf") while energy == float("inf"): rand_hypothesis = Hypothesis.get_random_hypothesis(data) energy = rand_hypothesis.get_energy() log_hypothesis(rand_hypothesis) total_hypotheses += 1 print("Total hypotheses generated: ", total_hypotheses) print() log_hypothesis(rand_hypothesis)
def collect_all_island_results(self): best_energy = float("inf") best_hypothesis = None for _ in range(self.local_simulation_num_islands): island, hypothesis = self.result_queue.get(block=True) self.logger.info('{} best hypothesis:'.format(island)) log_hypothesis(hypothesis, self.logger.info) energy = hypothesis.get_energy() if energy < best_energy: best_energy = energy best_hypothesis = hypothesis self.best_energy = best_energy self.best_hypothesis = best_hypothesis if self.best_hypothesis: self.logger.info('*Best hypothesis from all islands:*') log_hypothesis(self.best_hypothesis, self.logger.info)
def debug_generation(self): self.hall_of_fame.update(self.population) record = self.stats.compile(self.population) if self.stats else {} self.logbook.record(gen=self.generation, nevals=self.new_individuals_in_generation, **record) self.logger.info(self.logbook.stream) best_hypothesis_str = hypothesis_to_string(self.hall_of_fame[0]) record.update({ 'generation': self.generation, 'best_hypothesis': best_hypothesis_str }) self.logger.log_stats_record(record, self.island_number) if self.generation != self.initial_generation and self.generation % ga_config.DUMP_POPULATION_INTERVAL == 0: self.dump_population() if self.generation > 0 and self.generation % ga_config.CLEAR_KEY_VALUE_CACHE_INTERVAL == 0: self.cache.flush() if self.generation > 0 and self.generation % ga_config.CLEAR_TRANSDUCERS_CACHE_INTERVAL == 0: from bracket_rule_transducer import BracketRuleTransducer self.logger.info("Clearing rule set cache...") RuleSet.clear_caching() self.logger.info("Clearing bracket rule transducers cache...") BracketRuleTransducer.clear_caching() if ga_config.LOG_POPULATION_INTERVAL > 0 and self.generation % ga_config.LOG_POPULATION_INTERVAL == 0 and self.generation > 0: self.log_all_population() if self.generation % ga_config.HALL_OF_FAME_DEBUG_INTERVAL == 0: self.logger.debug('\n\n**** {} top {} hypothesis:****\n'.format( self.island_name, ga_config.HALL_OF_FAME_HYPOTHESES)) for i in range(ga_config.HALL_OF_FAME_HYPOTHESES): try: hypo = self.hall_of_fame[i] self.logger.debug('** #{} **'.format(i + 1)) log_hypothesis(hypo, self.logger.debug) self.logger.debug('\n') except IndexError: break
def start(self): logger.info('GRID SEARCH ') for c, combination in enumerate(self.all_combinations): current_params = {} for i in range(len(combination)): param = GRID[i][0] val = combination[i] setattr(modules['ga_config'], param, val) current_params[param] = combination[i] logger.info('GRID SEARCH Starting simulation {}/{} with params:\n{}'.format(c+1, len(self.all_combinations), str(current_params))) final_hypothesis = self.run_simulation() if final_hypothesis: self.results.append([current_params, final_hypothesis.get_energy()]) logger.info('GRID SEARCH Ended simulation with final hypothesis:') log_hypothesis(final_hypothesis, logger.info) else: logger.error('GRID SEARCH failed - simulation failed with current params ' '(probably param combination is invalid). Trying next combination on grid...') logger.info('GRID SEARCH Final results:\n{}'.format(json.dumps(self.results)))
def test_crossover_random(self): self.initialise_segment_table("dag_zook_segments_new.txt") data = ['kat', 'dot', 'dag', 'kod'] + \ ['katso', 'dotso', 'dagzo', 'kodzo'] + \ ['katko', 'dotko', 'daggo', 'kodgo'] + \ ['katto', 'dotto', 'dagdo', 'koddo'] config['EVOLVE_RULES'] = True config['EVOLVE_HMM'] = True config['MAX_NUMBER_OF_RULES'] = 3 h1 = GAHypothesis(data) h2 = GAHypothesis(data) print('\nH1\n') log_hypothesis(h1) print('\nH2\n') log_hypothesis(h2) offspring_1, offspring_2 = GeneticAlgorithm._crossover(h1, h2) print('\nOFFSPRING 1\n') log_hypothesis(offspring_1) print('\nOFFSPRING 2\n') log_hypothesis(offspring_2)