def get_random_input(self, block_material: BlockMaterial, req_dtype, _min=None, _max=None, exclude=[]): ''' search the genome of the block_material between _min and _max, for a node that outputs the req_dtype. return None if we failed to find a matching input. note _max is exclusive so [_min,_max) ''' ezLogging.debug("%s - Inside get_random_input; req_dtype: %s, _min: %s, _max: %s, exclude: %s" % (block_material.id, req_dtype, _min, _max, exclude)) if _min is None: _min = -1*self.input_count if _max is None: _max = self.main_count choices = np.arange(_min, _max) for val in exclude: choices = np.delete(choices, np.where(choices==val)) if len(choices) == 0: ezLogging.warning("%s - Eliminated all possible input nodes with exclude: %s" % (block_material.id, exclude)) return None else: # exhuastively try each choice to see if we can get datatypes to match poss_inputs = np.random.choice(a=choices, size=len(choices), replace=False) for input_index in poss_inputs: input_dtype = self.get_node_dtype(block_material, input_index, "output") ezLogging.debug("%s - trying to match index %i with %s to %s" % (block_material.id, input_index, input_dtype, req_dtype)) if req_dtype == input_dtype: return input_index else: pass # none of the poss_inputs worked, failed to find matching input ezLogging.warning("%s - None of the input nodes matched for req_dtype: %s, exclude: %s, min: %s, max: %s" % (block_material.id, req_dtype, exclude, _min, _max)) return None
def mutate_single_argvalue(mutant_material: BlockMaterial, block_def): #: BlockDefinition): ''' instead of looking for a different arg index in .args with the same arg type, mutate the value stored in this arg index. ''' ezLogging.info("%s - Inside mutate_single_argvalue" % (mutant_material.id)) if len(mutant_material.active_args) > 0: # if block has arguments, then there is something to mutate choices = np.arange(block_def.arg_count) choices = rnd.choice(choices, size=len(choices), replace=False) #randomly reorder for arg_index in choices: mutant_material.args[arg_index].mutate() ezLogging.info("%s - Mutated node %i; new arg value: %s" % (mutant_material.id, arg_index, mutant_material.args[arg_index])) if arg_index in mutant_material.active_args: # active_arg finally mutated ezLogging.debug("%s - Mutated node %i - active" % (mutant_material.id, arg_index)) mutant_material.need_evaluate = True break else: ezLogging.debug("%s - Mutated node %i - inactive" % (mutant_material.id, arg_index)) else: # won't actually mutate ezLogging.warning("%s - No active args to mutate" % (mutant_material.id))
def get_random_arg(self, req_dtype, exclude=[]): ''' similar to get_random_input to find an arg_index that matches the req_dtype ''' ezLogging.debug("%s-%s - Inside get_random_arg; req_dtype: %s, exclude: %s" % (None, self.nickname, req_dtype, exclude)) choices = [] for arg_index, arg_type in enumerate(self.arg_types): if (arg_type == req_dtype) and (arg_index not in exclude): choices.append(arg_index) if len(choices) == 0: ezLogging.warning("%s-%s - Eliminated all possible arg values for req_dtype: %s, exclude: %s" % (None, self.nickname, req_dtype, exclude)) return None else: return rnd.choice(choices)
def mutate_single_argindex(mutant_material: BlockMaterial, block_def): #: BlockDefinition): ''' search through the args and try to find a matching arg_type and use that arg index instead ''' ezLogging.info("%s - Inside mutate_single_argindex" % (mutant_material.id)) if len(mutant_material.active_args) > 0: # then there is something to mutate choices = [] # need to find those nodes with 'args' filled #weights = [] # option to sample node_index by the number of args for each node for node_index in range(block_def.main_count): if len(mutant_material[node_index]["args"]) > 0: choices.append(node_index) #weights.append(len(mutant_material[node_index]["args"])) else: pass choices = rnd.choice(choices, size=len(choices), replace=False) #randomly reorder for node_index in choices: ith_arg = rnd.choice( np.arange(len(mutant_material[node_index]["args"]))) current_arg = mutant_material[node_index]["args"][ith_arg] arg_dtype = block_def.get_node_dtype(mutant_material, node_index, "args")[ith_arg] new_arg = block_def.get_random_arg(arg_dtype, exclude=[current_arg]) if new_arg is None: # failed to find a new_arg continue else: mutant_material[node_index]["args"][ith_arg] = new_arg ezLogging.info( "%s - Mutated node %i; ori arg index: %i, new arg index: %i" % (mutant_material.id, node_index, current_arg, new_arg)) if node_index in mutant_material.active_nodes: # active_node finally mutated ezLogging.debug("%s - Mutated node %i - active" % (mutant_material.id, node_index)) mutant_material.need_evaluate = True break else: ezLogging.debug("%s - Mutated node %i - inactive" % (mutant_material.id, node_index)) else: # won't actually mutate ezLogging.warning("%s - No active args to mutate" % (mutant_material.id))
def get_random_ftn(self, req_dtype=None, exclude=[], return_all=False): ''' similar to get_random_input but returns a function/primitive that, if given, will output something with the same data type as req_dtype. if return_all, it will return all matching functions but in a random order based off a random sample; otherwise it returns just one randomly sampled. we should only fail to find a matching function, if exclude contains all functions that could match. This assumes that the user has included primitives that output data types that we would want to see in our genome ''' ezLogging.debug("%s-%s - Inside get_random_ftn; req_dtype: %s, exclude: %s, return_all: %s" % (None, self.nickname, req_dtype, exclude, return_all)) choices = np.array(self.operators) weights = np.array(self.operator_weights) for val in exclude: #note: have to delete from weights first because we use choices to get right index weights = np.delete(weights, np.where(choices==val)) choices = np.delete(choices, np.where(choices==val)) # now check the output dtypes match if req_dtype is not None: delete = [] for ith_choice, choice in enumerate(choices): if self.operator_dict[choice]["output"] != req_dtype: delete.append(ith_choice) weights = np.delete(weights, delete) choices = np.delete(choices, delete) if len(choices) == 0: # we have somehow eliminated all possible options ezLogging.warning("%s-%s - Eliminated all available operators for req_dtype: %s, and excluding: %s" % (None, self.nickname, req_dtype, exclude)) return None if weights.sum() < 1 - 1e-3: #arbitrarily chose 1e-3 to account for rounding errors # we must have removed some values...normalize weights /= weights.sum() if return_all: return rnd.choice(choices, size=len(choices), replace=False, p=weights) else: return rnd.choice(choices, p=weights)
def adjust_pop_size(self, problem: ProblemDefinition_Abstract, multiples_of: List[int]): ''' in certain cases, we may need to have our population size be a multiple of something so that the code doesn't break. for example, if we use some tournament selection, the poulation will likely need to be a multiple of 2 or 4. or if we use mpi, then we would want our pop size to be a multiple of the number of nodes we are using out of convenience. ''' original_size = problem.pop_size possible_size = deepcopy(original_size) satisfied = False direction = "down" while not satisfied: # loop until we have a pop_size that is a multiple of all ints in the list for mult in multiples_of: if possible_size % mult != 0: # not a multiple...not satisfied if direction == "down": possible_size -= possible_size % mult else: possible_size += (mult - possible_size % mult) satisfied = False break else: # it is a multiple...so far, we are satisfied satisfied = True if possible_size <= 0: # then we failed...try changing direction print(possible_size) satisfied = False direction = "up" possible_size = deepcopy(original_size) if possible_size != original_size: ezLogging.warning( "Changing problem's population size from %i to %i to be multiples of %s" % (original_size, possible_size, multiples_of)) problem.pop_size = possible_size
def check_convergence(self, universe): GENERATION_LIMIT = 5 SCORE_MIN = 1e-1 # only going to look at the first objective value which is rmse min_firstobjective_index = universe.pop_fitness_scores[:, 0].argmin() min_firstobjective = universe.pop_fitness_scores[ min_firstobjective_index, :-1] ezLogging.warning( "Checking Convergence - generation %i, best score: %s" % (universe.generation, min_firstobjective)) if universe.generation >= GENERATION_LIMIT: ezLogging.warning("TERMINATING...reached generation limit.") universe.converged = True if min_firstobjective[0] < SCORE_MIN: ezLogging.warning("TERMINATING...reached minimum scores.") universe.converged = True
def check_convergence(self, universe): """ :param universe: :return: """ GENERATION_LIMIT = 2 #50 SCORE_MIN = 1 - 1e-10 # only going to look at the 2nd objective value which is f1 min_firstobjective_index = universe.pop_fitness_scores[:, 1].argmin() min_firstobjective = universe.pop_fitness_scores[ min_firstobjective_index, :] ezLogging.warning( "Checking Convergence - generation %i, best score: %s" % (universe.generation, min_firstobjective)) if universe.generation >= GENERATION_LIMIT: ezLogging.warning("TERMINATING...reached generation limit.") universe.converged = True if np.abs(min_firstobjective[0]) > SCORE_MIN: ezLogging.warning("TERMINATING...reached minimum scores.") universe.converged = True
def main(problem_filename: str, problem_output_directory: str = tempfile.mkdtemp(), seed: int = 0, loglevel: int = logging.WARNING): node_rank = MPI.COMM_WORLD.Get_rank( ) # which node are we on if mpi, else always 0 node_size = MPI.COMM_WORLD.Get_size( ) # how many nodes are we using if mpi, else always 1 # want to make sure that creation of files, folders, and logging is not duplicated if using mpi. # the following is always true if not using mpi if node_rank == 0: os.makedirs(problem_output_directory, exist_ok=False) # copy problem file over to problem_output_directory # this way we know for sure which version of the problem file resulted in the output src = join(dirname(realpath(__file__)), "problems", problem_filename) dst = join(problem_output_directory, problem_filename) shutil.copyfile(src, dst) # create custom logging.logger for this node log_formatter = ezLogging.logging_setup(loglevel) if loglevel < logging.WARNING: # true only for DEBUG or INFO log_handler_2stdout = ezLogging.logging_2stdout(log_formatter) else: log_handler_2stdout = None # set the seed before importing problem. # NOTE will set another seed when we start the universe ezLogging.warning("Setting seed, for file imports, to %i" % (seed)) np.random.seed(seed) random.seed( seed) # shouldn't be using 'random' module but setting seed jic problem_module = __import__( problem_filename[:-3]) #remoe the '.py' from filename problem = problem_module.Problem() from codes.universe import UniverseDefinition, MPIUniverseDefinition log_handler_2file = None # just initializing for ith_universe in range(problem.number_universe): # set new output directory universe_output_directory = os.path.join(problem_output_directory, "univ%04d" % ith_universe) if node_rank == 0: os.makedirs(universe_output_directory, exist_ok=False) MPI.COMM_WORLD.Barrier() # init corresponding universe and new log file handler if problem.mpi: ezLogging_method = ezLogging.logging_2file_mpi universe_seed = seed + 1 + (ith_universe * node_size) + node_rank ThisUniverse = MPIUniverseDefinition else: ezLogging_method = ezLogging.logging_2file universe_seed = seed + 1 + ith_universe ThisUniverse = UniverseDefinition log_handler_2file = ezLogging_method(log_formatter, filename=os.path.join( universe_output_directory, "log.txt")) ezLogging.warning("Setting seed for Universe, to %i" % (universe_seed)) np.random.seed(universe_seed) random.seed(seed) ezLogging.warning("STARTING UNIVERSE %i" % ith_universe) universe = ThisUniverse(problem, universe_output_directory) # run start_time = time.time() universe.run(problem) ezLogging.warning("...time of universe %i: %.2f minutes" % (ith_universe, (time.time() - start_time) / 60)) # do some clean up, if we're about to start another run # remove previous universe log file handler if exists ezLogging.logging_remove_handler(log_handler_2file) # TODO is there a way to track memory usage before and after here? del universe # will that also delete populations? or at least gc.collect will remove it? gc.collect()