def store_result(self, result, context): if isinstance(result, (np.ndarray, la.LArray)): res_type = result.dtype.type else: res_type = type(result) if self.temporary: target = self.entity.temp_variables else: # we cannot store/cache self.entity.array[self.name] because the # array object can change (eg when enlarging it due to births) target = self.entity.array result = np.asarray(result) # TODO: assert type for temporary variables too target_type_idx = type_to_idx[target[self.name].dtype.type] res_type_idx = type_to_idx[res_type] if res_type_idx > target_type_idx: raise Exception( "trying to store %s value into '%s' field which is of " "type %s" % (idx_to_type[res_type_idx].__name__, self.name, idx_to_type[target_type_idx].__name__)) # the whole column is updated target[self.name] = result # invalidate cache period = context.period if isinstance(period, np.ndarray): assert np.isscalar(period) or not period.shape period = int(period) expr_cache.invalidate(period, context.entity_name, Variable(self.entity, self.name))
def compute(self, context, filter=None): filter_value = filter if filter_value is None: # this is pretty inefficient, but remove without filter is not # common enough to bother filter_value = np.ones(len(context), dtype=bool) if not np.any(filter_value): return not_removed = ~filter_value entity = context.entity len_before = len(entity.array) # Shrink array & temporaries. 99% of the function time is spent here. entity.array.keep(not_removed) temp_variables = entity.temp_variables for name, temp_value in temp_variables.items(): # This is brittle but there is nothing I can do about it now. Ideally, we should disallow # storing expressions which do not result in a scalar or per-individual value # (eg expressions using global arrays) in entity.temporary_variables # the problem is that users currently do not have any other choice in this regard. # globals are not writable/there are no globals.temporary variables nor global processes nor global macros # see issue #250. if isinstance(temp_value, np.ndarray) and temp_value.ndim == 1 and len(temp_value) == len_before: temp_variables[name] = temp_value[not_removed] # update id_to_rownum already_removed = entity.id_to_rownum == -1 already_removed_indices = filter_to_indices(already_removed) already_removed_indices_shifted = \ already_removed_indices - np.arange(len(already_removed_indices)) id_to_rownum = np.arange(len_before) id_to_rownum -= filter_value.cumsum() # XXX: use np.putmask(id_to_rownum, filter_value, -1) id_to_rownum[filter_value] = -1 entity.id_to_rownum = np.insert(id_to_rownum, already_removed_indices_shifted, -1) # this version is cleaner and slightly faster but the result is also # slightly wrong: it eliminates ids for dead/removed individuals at # the end of the array and this cause bugs in time-related functions # ids = entity.array['id'] # id_to_rownum = np.full(np.max(ids) + 1, -1, dtype=int) # id_to_rownum[ids] = np.arange(len(ids), dtype=int) # entity.id_to_rownum = id_to_rownum if config.log_level == "processes": print("%d %s(s) removed (%d -> %d)" % (filter_value.sum(), entity.name, len_before, len(entity.array)), end=' ') # TODO: in the case of remove(), we should update (take a subset of) all # the cache keys matching the entity, but with the current code, # it is most likely not worth it because the cache probably contains # mostly stuff we will never use. expr_cache.invalidate(context.period, context.entity_name)
def match_cell(idx, sorted_idx, pool_size): global matching_ctx set2_size = context_length(matching_ctx) if not set2_size: raise StopIteration if pool_size is not None and set2_size > pool_size: pool = random.sample(range(set2_size), pool_size) local_ctx = context_subset(matching_ctx, pool) else: local_ctx = matching_ctx.copy() local_ctx.update((k, set1[k][sorted_idx]) for k in {'__ids__'} | used_variables1) eval_ctx = context.clone(entity_data=local_ctx) set2_scores = expr_eval(score, eval_ctx) cell2_idx = set2_scores.argmax() cell1ids = local_ctx['__ids__'] cell2ids = local_ctx['__other___ids__'][cell2_idx] if pool_size is not None and set2_size > pool_size: # transform pool-local index to set/matching_ctx index cell2_idx = pool[cell2_idx] cell1size = len(cell1ids) cell2size = len(cell2ids) nb_match = min(cell1size, cell2size) # we could introduce a random choice here but it is not # much necessary. In that case, it should be done in group_context ids1 = cell1ids[:nb_match] ids2 = cell2ids[:nb_match] result[id_to_rownum[ids1]] = ids2 result[id_to_rownum[ids2]] = ids1 if nb_match == cell2size: matching_ctx = context_delete(matching_ctx, cell2_idx) else: # other variables do not need to be modified since the cell # only got smaller and was not deleted matching_ctx['__other___ids__'][cell2_idx] = cell2ids[ nb_match:] # FIXME: the expr gets cached for the full matching_ctx at the # beginning and then when another women with the same values is # found, it thinks it can reuse the expr but it breaks because it # has not the correct length. # the current workaround is to invalidate the whole cache for the # current entity but this is not the right way to go. # * disable the cache for matching? # * use a local cache so that methods after matching() can use # what was in the cache before matching(). Shouldn't the cache be # stored inside the context anyway? expr_cache.invalidate(context.period, context.entity_name) if nb_match < cell1size: set1['__ids__'][sorted_idx] = cell1ids[nb_match:] match_cell(idx, sorted_idx, pool_size)
def compute(self, context, entity_name=None, filter=None, number=None, **kwargs): if filter is not None and number is not None: # Having neither is allowed, though, as there can be a contextual # filter. Also, there is no reason to prevent the whole # population giving birth, even though the usefulness of such # usage seem dubious. raise ValueError("new() 'filter' and 'number' arguments are " "mutually exclusive") source_entity = context.entity if entity_name is None: target_entity = source_entity else: target_entity = context.entities[entity_name] # target context is the context where the new individuals will be # created if target_entity is source_entity: target_context = context else: # we do need to copy the data (.extra) because we will insert into # the entity.array anyway => fresh_data=True target_context = context.clone(fresh_data=True, entity_name=target_entity.name) filter_expr = self._getfilter(context, filter) if filter_expr is not None: to_give_birth = expr_eval(filter_expr, context) num_birth = to_give_birth.sum() elif number is not None: to_give_birth = None num_birth = number else: to_give_birth = np.ones(len(context), dtype=bool) num_birth = len(context) array = target_entity.array default_values = target_entity.fields.default_values id_to_rownum = target_entity.id_to_rownum num_individuals = len(id_to_rownum) children = self._initial_values(array, to_give_birth, num_birth, default_values) if num_birth: children['id'] = np.arange(num_individuals, num_individuals + num_birth) children['period'] = context.period used_variables = [v.name for v in self._collect_kwargs_variables(kwargs)] if to_give_birth is None: assert not used_variables child_context = context.empty(num_birth) else: child_context = context.subset(to_give_birth, used_variables, filter_expr) for k, v in kwargs.items(): if k not in array.dtype.names: print("WARNING: {} is unknown, ignoring it!".format(k)) continue children[k] = expr_eval(v, child_context) add_individuals(target_context, children) expr_cache.invalidate(context.period, context.entity_name) # result is the ids of the new individuals corresponding to the source # entity if to_give_birth is not None: result = np.full(context_length(context), -1, dtype=int) if source_entity is target_entity: extra_bools = np.zeros(num_birth, dtype=bool) to_give_birth = np.concatenate((to_give_birth, extra_bools)) # Note that np.place is a bit faster, but is currently buggy when # working with columns of structured arrays. # See https://github.com/numpy/numpy/issues/2462 result[to_give_birth] = children['id'] return result else: return None
def compute(self, context, entity_name=None, filter=None, number=None, **kwargs): if filter is not None and number is not None: # Having neither is allowed, though, as there can be a contextual # filter. Also, there is no reason to prevent the whole # population giving birth, even though the usefulness of such # usage seem dubious. raise ValueError("new() 'filter' and 'number' arguments are " "mutually exclusive") source_entity = context.entity if entity_name is None: target_entity = source_entity else: target_entity = context.entities[entity_name] # target context is the context where the new individuals will be # created if target_entity is source_entity: target_context = context else: # we do need to copy the data (.extra) because we will insert into # the entity.array anyway => fresh_data=True target_context = context.clone(fresh_data=True, entity_name=target_entity.name) filter_expr = self._getfilter(context, filter) if filter_expr is not None: to_give_birth = expr_eval(filter_expr, context) num_birth = to_give_birth.sum() elif number is not None: to_give_birth = None num_birth = number else: to_give_birth = np.ones(len(context), dtype=bool) num_birth = len(context) array = target_entity.array default_values = target_entity.fields.default_values id_to_rownum = target_entity.id_to_rownum num_individuals = len(id_to_rownum) children = self._initial_values(array, to_give_birth, num_birth, default_values) if num_birth: children['id'] = np.arange(num_individuals, num_individuals + num_birth) children['period'] = context.period used_variables = [ v.name for v in self._collect_kwargs_variables(kwargs) ] if to_give_birth is None: assert not used_variables child_context = context.empty(num_birth) else: child_context = context.subset(to_give_birth, used_variables, filter_expr) for k, v in kwargs.items(): if k not in array.dtype.names: print("WARNING: {} is unknown, ignoring it!".format(k)) continue children[k] = expr_eval(v, child_context) add_individuals(target_context, children) expr_cache.invalidate(context.period, context.entity_name) # result is the ids of the new individuals corresponding to the source # entity if to_give_birth is not None: result = np.full(context_length(context), -1, dtype=int) if source_entity is target_entity: extra_bools = np.zeros(num_birth, dtype=bool) to_give_birth = np.concatenate((to_give_birth, extra_bools)) # Note that np.place is a bit faster, but is currently buggy when # working with columns of structured arrays. # See https://github.com/numpy/numpy/issues/2462 result[to_give_birth] = children['id'] return result else: return None