def update_stats(individuals, end): """ Update all stats in the stats dictionary. :param individuals: A population of individuals. :param end: Boolean flag for indicating the end of an evolutionary run. :return: Nothing. """ if not end: # Time Stats trackers.time_list.append(time() - stats['time_adjust']) stats['time_taken'] = trackers.time_list[-1] - \ trackers.time_list[-2] stats['total_time'] = trackers.time_list[-1] - \ trackers.time_list[0] # Population Stats stats['total_inds'] = params['POPULATION_SIZE'] * (stats['gen'] + 1) stats['runtime_error'] = len(trackers.runtime_error_cache) if params['CACHE']: stats['unique_inds'] = len(trackers.unique_ind_tracker) stats['unused_search'] = 100 - stats['unique_inds'] / \ stats['total_inds'] * 100 # Genome Stats genome_lengths = [len(i.genome) for i in individuals] stats['max_genome_length'] = np.nanmax(genome_lengths) stats['ave_genome_length'] = np.nanmean(genome_lengths) stats['min_genome_length'] = np.nanmin(genome_lengths) # Used Codon Stats codons = [i.used_codons for i in individuals] stats['max_used_codons'] = np.nanmax(codons) stats['ave_used_codons'] = np.nanmean(codons) stats['min_used_codons'] = np.nanmin(codons) # Tree Depth Stats depths = [i.depth for i in individuals] stats['max_tree_depth'] = np.nanmax(depths) stats['ave_tree_depth'] = np.nanmean(depths) stats['min_tree_depth'] = np.nanmin(depths) # Tree Node Stats nodes = [i.nodes for i in individuals] stats['max_tree_nodes'] = np.nanmax(nodes) stats['ave_tree_nodes'] = np.nanmean(nodes) stats['min_tree_nodes'] = np.nanmin(nodes) # Not using this for current research, don't need to waste time calculating # Novelty Stats # n = novelty() # total_geno = 0 # total_levi = 0 # total_ast = 0 # total_deriv = 0 # total_output = 0 # ind_size = len(individuals) # for ind in individuals: # ind.novelty = np.NaN # total_output += n.evaluate_distance(ind, "output") # ind.novelty = np.NaN # total_geno += n.evaluate_distance(ind, "genotype") # ind.novelty = np.NaN # total_levi += n.evaluate_distance(ind, "levi") # ind.novelty = np.NaN # total_ast += n.evaluate_distance(ind, "ast") # ind.novelty = np.NaN # total_deriv += n.evaluate_distance(ind, "derivation") # ind.novelty = np.NaN # # stats["novelty_output"] = total_output / ind_size # stats["novelty_genotype"] = total_geno / ind_size # stats["novelty_phenotype"] = total_levi / ind_size # stats["novelty_ast"] = total_ast / ind_size # stats["novelty_derivation"] = total_deriv / ind_size # import datetime # start = datetime.datetime.now() if params["NOVELTY"]: if end: import random from representation.individual import Individual # Total Novelty Stats n = novelty() total_output = 0 total_geno = 0 total_levi = 0 total_ast = 0 total_deriv = 0 cache_size = len(trackers.cache) individual_dics = list(trackers.cache.values()) sample_size = min(max(1000, cache_size // 10), 10000) sample_size = min(cache_size, sample_size) ind_sample = random.sample(individual_dics, sample_size) derivation_novelties = [] output_novelties = [] for ind_dic in ind_sample: ind = Individual(ind_dic["genome"], None, False) ind.fitness = ind_dic["fitness"] ind.phenotype = ind_dic["phenotype"] ind.AST = ind_dic["AST"] ind.derivation = ind_dic["derivation"] ind.test_cases = ind_dic["output_cases"] ind.novelty = np.NaN out_distance = n.evaluate_distance(ind, "output") output_novelties.append(out_distance) total_output += out_distance ind.novelty = np.NaN total_geno += n.evaluate_distance(ind, "genotype") ind.novelty = np.NaN total_levi += n.evaluate_distance(ind, "levi") ind.novelty = np.NaN total_ast += n.evaluate_distance(ind, "ast") ind.novelty = np.NaN der_distance = n.evaluate_distance(ind, "derivation") derivation_novelties.append(der_distance) total_deriv += der_distance ind.novelty = np.NaN stats["nov_output_total"] = total_output / sample_size stats["nov_genotype_total"] = total_geno / sample_size stats["nov_phenotype_total"] = total_levi / sample_size stats["nov_ast_total"] = total_ast / sample_size stats["nov_derivation_total"] = total_deriv / sample_size # Change the last generation stats in the stats list too final_stats = trackers.stats_list[-1] final_stats["nov_output_total"] = total_output / sample_size final_stats["nov_genotype_total"] = total_geno / sample_size final_stats["nov_phenotype_total"] = total_levi / sample_size final_stats["nov_ast_total"] = total_ast / sample_size final_stats["nov_derivation_total"] = total_deriv / sample_size # print("Novelty calculation time: " + str(datetime.datetime.now() - start)) if not hasattr(params['FITNESS_FUNCTION'], 'multi_objective'): # Fitness Stats fitnesses = [i.fitness for i in individuals] stats['ave_fitness'] = np.nanmean(fitnesses, axis=0) stats['best_fitness'] = trackers.best_ever.fitness
def load_population(target): """ Given a target folder, read all files in the folder and load/parse solutions found in each file. :param target: A target folder stored in the "seeds" folder. :return: A list of all parsed individuals stored in the target folder. """ # Set path for seeds folder path_1 = path.join(getcwd(), "..", "seeds") if not path.isdir(path_1): # Seeds folder does not exist. s = "scripts.seed_PonyGE2.load_population\n" \ "Error: `seeds` folder does not exist in root directory." raise Exception(s) path_2 = path.join(path_1, target) if not path.isdir(path_2): # Target folder does not exist. s = "scripts.seed_PonyGE2.load_population\n" \ "Error: target folder " + target + \ " does not exist in seeds directory." raise Exception(s) # Get list of all target individuals in the target folder. target_inds = [i for i in listdir(path_2) if i.endswith(".txt")] # Initialize empty list for seed individuals. seed_inds = [] for ind in target_inds: # Loop over all target individuals. # Get full file path. file_name = path.join(path_2, ind) # Initialise None data for ind info. genotype, phenotype = None, None # Open file. with open(file_name, "r") as f: # Read file. raw_content = f.read() # Read file. content = raw_content.split("\n") # Check if genotype is already saved in file. if "Genotype:" in content: # Get index location of genotype. gen_idx = content.index("Genotype:") + 1 # Get the genotype. try: genotype = eval(content[gen_idx]) except: s = "scripts.seed_PonyGE2.load_population\n" \ "Error: Genotype from file " + file_name + \ " not recognized: " + content[gen_idx] raise Exception(s) # Check if phenotype (target string) is already saved in file. if "Phenotype:" in content: # Get index location of genotype. phen_idx = content.index("Phenotype:") + 1 # Get the phenotype. phenotype = content[phen_idx] # TODO: Current phenotype is read in as single-line only. Split is performed on "\n", meaning phenotypes that span multiple lines will not be parsed correctly. This must be fixed in later editions. elif "Genotype:" not in content: # There is no explicit genotype or phenotype in the target # file, read in entire file as phenotype. phenotype = raw_content if genotype: # Generate individual from genome. ind = Individual(genotype, None) if phenotype and ind.phenotype != phenotype: s = "scripts.seed_PonyGE2.load_population\n" \ "Error: Specified genotype from file " + file_name + \ " doesn't map to same phenotype. Check the specified " \ "grammar to ensure all is correct: " + \ params['GRAMMAR_FILE'] raise Exception(s) else: # Set target for GE LR Parser. params['REVERSE_MAPPING_TARGET'] = phenotype # Parse target phenotype. ind = GE_LR_parser.main() # Add new ind to the list of seed individuals. seed_inds.append(ind) return seed_inds
def evaluate_distance(self, ind: Individual, novelty_alg: str = "levi", max_comparisons: int = 100) -> float: """Compare current phenotype with phenotypes from other seen phenotypes: scales very poorly without a max number of comparisons, as the cache is constantly growing" :param ind: An individual to be evaluated :param novelty_alg: algorithm to be used :param max_comparisons: The upper bound on the number of comparisons to run :return: The novelty of the individual, larger number represents larger novelty """ if not np.isnan(ind.novelty): return ind.novelty size_cache = len(cache) # Bound the number of comparisons number_comparisons = (size_cache if size_cache < max_comparisons else max_comparisons) total_novelty = 0 if size_cache > 0: choices = sample(cache.keys(), number_comparisons) for other_phenotype in choices: # If comparing to itself, don't count it if other_phenotype == ind.phenotype: number_comparisons -= 1 continue # Want hamming distance of genotype if novelty_alg in ("geno", "genotype"): other_geno = cache[other_phenotype]["genome"] smaller_size = min(len(ind.genome), len(other_geno)) this_novelty = 0 for index in range(smaller_size): if ind.genome[index] != other_geno[index]: this_novelty += 1 total_novelty += this_novelty / smaller_size # Compute hamming distance of phenotype elif novelty_alg == 'hamming': smaller_size = min(len(ind.phenotype), len(other_phenotype)) total_novelty += hdistance(ind.phenotype[:smaller_size], other_phenotype[:smaller_size]) # Compute the normalized levenshtein distance elif novelty_alg in ("levi", "levenshtein", "pheno", "phenotype"): total_novelty += ldistance( ind.phenotype, other_phenotype) / max( len(ind.phenotype), len(other_phenotype)) # Compute distance of flat AST trees elif novelty_alg == "ast": other_ind = cache[other_phenotype] total_novelty += self.compare_tree_dicts( ind.AST, other_ind["AST"]) # Compute distance of flat derivation trees elif novelty_alg == "derivation": other_ind = cache[other_phenotype] total_novelty += self.compare_tree_dicts( ind.derivation, other_ind["derivation"]) elif novelty_alg == "fitness": other_ind = cache[other_phenotype] total_novelty += abs(ind.fitness - other_ind["fitness"]) elif novelty_alg == "output": other_ind = cache[other_phenotype] count = 0 for tcase_ind in range(len(ind.test_cases)): count += ((ind.test_cases[tcase_ind] + other_ind["output_cases"][tcase_ind]) % 2) total_novelty += count else: raise NotImplementedError(novelty_alg + " has not been implemented") ind.novelty = total_novelty / number_comparisons return ind.novelty # If cache is empty, doesn't matter what is returned since every individual will reach this point # and thus will all have the same novelty. Also, cache should never be empty. return 0