Exemplo n.º 1
0
    def store_result(self, result, context):
        if isinstance(result, np.ndarray):
            res_type = result.dtype.type
        else:
            res_type = type(result)

        if self.temporary:
            target = self.entity.temp_variables
        else:
            # we cannot store/cache self.entity.array[self.name] because the
            # array object can change (eg when enlarging it due to births)
            target = self.entity.array

            # TODO: assert type for temporary variables too
            target_type_idx = type_to_idx[target[self.name].dtype.type]
            res_type_idx = type_to_idx[res_type]
            if res_type_idx > target_type_idx:
                raise Exception(
                    "trying to store %s value into '%s' field which is of "
                    "type %s" % (idx_to_type[res_type_idx].__name__, self.name,
                                 idx_to_type[target_type_idx].__name__))

        # the whole column is updated
        target[self.name] = result

        # invalidate cache
        period = context.period
        if isinstance(period, np.ndarray):
            assert np.isscalar(period) or not period.shape
            period = int(period)
        expr_cache.invalidate(period, context.entity_name,
                              Variable(self.entity, self.name))
Exemplo n.º 2
0
    def store_result(self, result, context):
        if result is None:
            return

        if isinstance(result, np.ndarray):
            res_type = result.dtype.type
        else:
            res_type = type(result)

        if self.temporary:
            target = self.entity.temp_variables
        else:
            # we cannot store/cache self.entity.array[self.name] because the
            # array object can change (eg when enlarging it due to births)
            target = self.entity.array

            # TODO: assert type for temporary variables too
            target_type_idx = type_to_idx[target[self.name].dtype.type]
            res_type_idx = type_to_idx[res_type]
            if res_type_idx > target_type_idx:
                raise Exception(
                    "trying to store %s value into '%s' field which is of "
                    "type %s" % (idx_to_type[res_type_idx].__name__, self.name, idx_to_type[target_type_idx].__name__)
                )

        # the whole column is updated
        target[self.name] = result

        # invalidate cache
        period = context.period
        if isinstance(period, np.ndarray):
            assert np.isscalar(period) or not period.shape
            period = int(period)
        expr_cache.invalidate(period, context.entity_name, Variable(self.entity, self.name))
Exemplo n.º 3
0
    def compute(self, context, filter=None):
        filter_value = filter
        if filter_value is None:
            # this is pretty inefficient, but remove without filter is not
            # common enough to bother
            filter_value = np.ones(len(context), dtype=bool)

        if not np.any(filter_value):
            return

        not_removed = ~filter_value

        entity = context.entity
        len_before = len(entity.array)

        # Shrink array & temporaries. 99% of the function time is spent here.
        entity.array.keep(not_removed)
        temp_variables = entity.temp_variables
        for name, temp_value in temp_variables.items():
            if isinstance(temp_value, np.ndarray) and temp_value.shape:
                temp_variables[name] = temp_value[not_removed]

        # update id_to_rownum
        already_removed = entity.id_to_rownum == -1
        already_removed_indices = filter_to_indices(already_removed)
        already_removed_indices_shifted = \
            already_removed_indices - np.arange(len(already_removed_indices))

        id_to_rownum = np.arange(len_before)
        id_to_rownum -= filter_value.cumsum()
        #XXX: use np.putmask(id_to_rownum, filter_value, -1)
        id_to_rownum[filter_value] = -1
        entity.id_to_rownum = np.insert(id_to_rownum,
                                        already_removed_indices_shifted,
                                        -1)
        # this version is cleaner and slightly faster but the result is also
        # slightly wrong: it eliminates ids for dead/removed individuals at
        # the end of the array and this cause bugs in time-related functions
#        ids = entity.array['id']
#        id_to_rownum = np.empty(np.max(ids) + 1, dtype=int)
#        id_to_rownum.fill(-1)
#        id_to_rownum[ids] = np.arange(len(ids), dtype=int)
#        entity.id_to_rownum = id_to_rownum
        if config.log_level == "processes":
            print("%d %s(s) removed (%d -> %d)"
                  % (filter_value.sum(), entity.name, len_before,
                     len(entity.array)),
                  end=' ')

        #TODO: in the case of remove(), we should update (take a subset of) all
        # the cache keys matching the entity, but with the current code,
        # it is most likely not worth it because the cache probably contains
        # mostly stuff we will never use.
        expr_cache.invalidate(context.period, context.entity_name)
Exemplo n.º 4
0
    def compute(self, context, filter=None):
        filter_value = filter
        if filter_value is None:
            # this is pretty inefficient, but remove without filter is not
            # common enough to bother
            filter_value = np.ones(len(context), dtype=bool)

        if not np.any(filter_value):
            return

        not_removed = ~filter_value

        entity = context.entity
        len_before = len(entity.array)

        # Shrink array & temporaries. 99% of the function time is spent here.
        entity.array.keep(not_removed)
        temp_variables = entity.temp_variables
        for name, temp_value in temp_variables.items():
            if isinstance(temp_value, np.ndarray) and temp_value.shape:
                temp_variables[name] = temp_value[not_removed]

        # update id_to_rownum
        already_removed = entity.id_to_rownum == -1
        already_removed_indices = filter_to_indices(already_removed)
        already_removed_indices_shifted = \
            already_removed_indices - np.arange(len(already_removed_indices))

        id_to_rownum = np.arange(len_before)
        id_to_rownum -= filter_value.cumsum()
        # XXX: use np.putmask(id_to_rownum, filter_value, -1)
        id_to_rownum[filter_value] = -1
        entity.id_to_rownum = np.insert(id_to_rownum,
                                        already_removed_indices_shifted, -1)
        # this version is cleaner and slightly faster but the result is also
        # slightly wrong: it eliminates ids for dead/removed individuals at
        # the end of the array and this cause bugs in time-related functions
        #        ids = entity.array['id']
        #        id_to_rownum = np.full(np.max(ids) + 1, -1, dtype=int)
        #        id_to_rownum[ids] = np.arange(len(ids), dtype=int)
        #        entity.id_to_rownum = id_to_rownum
        if config.log_level == "processes":
            print("%d %s(s) removed (%d -> %d)" %
                  (filter_value.sum(), entity.name, len_before,
                   len(entity.array)),
                  end=' ')

        # TODO: in the case of remove(), we should update (take a subset of) all
        # the cache keys matching the entity, but with the current code,
        # it is most likely not worth it because the cache probably contains
        # mostly stuff we will never use.
        expr_cache.invalidate(context.period, context.entity_name)
Exemplo n.º 5
0
    def compute(self, context, entity_name=None, filter=None, number=None,
                **kwargs):
        if filter is not None and number is not None:
            # Having neither is allowed, though, as there can be a contextual
            # filter. Also, there is no reason to prevent the whole
            # population giving birth, even though the usefulness of such
            # usage seem dubious.
            raise ValueError("new() 'filter' and 'number' arguments are "
                             "mutually exclusive")
        source_entity = context.entity
        if entity_name is None:
            target_entity = source_entity
        else:
            target_entity = context.entities[entity_name]

        # target context is the context where the new individuals will be
        # created
        if target_entity is source_entity:
            target_context = context
        else:
            # we do need to copy the data (.extra) because we will insert into
            # the entity.array anyway => fresh_data=True
            target_context = context.clone(fresh_data=True,
                                           entity_name=target_entity.name)

        filter_expr = self._getfilter(context, filter)
        if filter_expr is not None:
            to_give_birth = expr_eval(filter_expr, context)
            num_birth = to_give_birth.sum()
        elif number is not None:
            to_give_birth = None
            num_birth = number
        else:
            to_give_birth = np.ones(len(context), dtype=bool)
            num_birth = len(context)

        array = target_entity.array

        id_to_rownum = target_entity.id_to_rownum
        num_individuals = len(id_to_rownum)

        children = self._initial_values(array, to_give_birth, num_birth)
        if num_birth:
            children['id'] = np.arange(num_individuals,
                                       num_individuals + num_birth)
            children['period'] = context.period

            used_variables = [v.name for v in
                              self._collect_kwargs_variables(kwargs)]
            if to_give_birth is None:
                assert not used_variables
                child_context = context.empty(num_birth)
            else:
                child_context = context.subset(to_give_birth, used_variables,
                                               filter_expr)
            for k, v in kwargs.iteritems():
                children[k] = expr_eval(v, child_context)

        add_individuals(target_context, children)

        expr_cache.invalidate(context.period, context.entity_name)

        # result is the ids of the new individuals corresponding to the source
        # entity
        if to_give_birth is not None:
            result = np.empty(context_length(context), dtype=int)
            result.fill(-1)
            if source_entity is target_entity:
                extra_bools = np.zeros(num_birth, dtype=bool)
                to_give_birth = np.concatenate((to_give_birth, extra_bools))
            # Note that np.place is a bit faster, but is currently buggy when
            # working with columns of structured arrays.
            # See https://github.com/numpy/numpy/issues/2462
            result[to_give_birth] = children['id']
            return result
        else:
            return None
Exemplo n.º 6
0
        def match_cell(idx, sorted_idx, pool_size):
            global matching_ctx

            set2_size = context_length(matching_ctx)
            if not set2_size:
                raise StopIteration

            if pool_size is not None and set2_size > pool_size:
                pool = random.sample(xrange(set2_size), pool_size)
                local_ctx = context_subset(matching_ctx, pool)
            else:
                local_ctx = matching_ctx.copy()

            local_ctx.update((k, set1[k][sorted_idx])
                             for k in {'__ids__'} | used_variables1)

            eval_ctx = context.clone(entity_data=local_ctx)
            set2_scores = expr_eval(score, eval_ctx)
            cell2_idx = set2_scores.argmax()

            cell1ids = local_ctx['__ids__']
            cell2ids = local_ctx['__other___ids__'][cell2_idx]

            if pool_size is not None and set2_size > pool_size:
                # transform pool-local index to set/matching_ctx index
                cell2_idx = pool[cell2_idx]

            cell1size = len(cell1ids)
            cell2size = len(cell2ids)
            nb_match = min(cell1size, cell2size)

            # we could introduce a random choice here but it is not
            # much necessary. In that case, it should be done in group_context
            ids1 = cell1ids[:nb_match]
            ids2 = cell2ids[:nb_match]

            result[id_to_rownum[ids1]] = ids2
            result[id_to_rownum[ids2]] = ids1
            
            if nb_match == cell2size:
                matching_ctx = context_delete(matching_ctx, cell2_idx)
            else:
                # other variables do not need to be modified since the cell
                # only got smaller and was not deleted
                matching_ctx['__other___ids__'][cell2_idx] = cell2ids[nb_match:]

            # FIXME: the expr gets cached for the full matching_ctx at the
            # beginning and then when another women with the same values is
            # found, it thinks it can reuse the expr but it breaks because it
            # has not the correct length.

            # the current workaround is to invalidate the whole cache for the
            # current entity but this is not the right way to go.
            # * disable the cache for matching?
            # * use a local cache so that methods after matching() can use
            # what was in the cache before matching(). Shouldn't the cache be
            # stored inside the context anyway?
            expr_cache.invalidate(context.period, context.entity_name)

            if nb_match < cell1size:
                set1['__ids__'][sorted_idx] = cell1ids[nb_match:]
                match_cell(idx, sorted_idx, pool_size)
Exemplo n.º 7
0
    def compute(self,
                context,
                entity_name=None,
                filter=None,
                number=None,
                **kwargs):
        if filter is not None and number is not None:
            # Having neither is allowed, though, as there can be a contextual
            # filter. Also, there is no reason to prevent the whole
            # population giving birth, even though the usefulness of such
            # usage seem dubious.
            raise ValueError("new() 'filter' and 'number' arguments are "
                             "mutually exclusive")
        source_entity = context.entity
        if entity_name is None:
            target_entity = source_entity
        else:
            target_entity = context.entities[entity_name]

        # target context is the context where the new individuals will be
        # created
        if target_entity is source_entity:
            target_context = context
        else:
            # we do need to copy the data (.extra) because we will insert into
            # the entity.array anyway => fresh_data=True
            target_context = context.clone(fresh_data=True,
                                           entity_name=target_entity.name)

        filter_expr = self._getfilter(context, filter)
        if filter_expr is not None:
            to_give_birth = expr_eval(filter_expr, context)
            num_birth = to_give_birth.sum()
        elif number is not None:
            to_give_birth = None
            num_birth = number
        else:
            to_give_birth = np.ones(len(context), dtype=bool)
            num_birth = len(context)

        array = target_entity.array
        default_values = target_entity.fields.default_values

        id_to_rownum = target_entity.id_to_rownum
        num_individuals = len(id_to_rownum)

        children = self._initial_values(array, to_give_birth, num_birth,
                                        default_values)
        if num_birth:
            children['id'] = np.arange(num_individuals,
                                       num_individuals + num_birth)
            children['period'] = context.period

            used_variables = [
                v.name for v in self._collect_kwargs_variables(kwargs)
            ]
            if to_give_birth is None:
                assert not used_variables
                child_context = context.empty(num_birth)
            else:
                child_context = context.subset(to_give_birth, used_variables,
                                               filter_expr)
            for k, v in kwargs.iteritems():
                if k not in array.dtype.names:
                    print("WARNING: {} is unknown, ignoring it!".format(k))
                    continue
                children[k] = expr_eval(v, child_context)

        add_individuals(target_context, children)

        expr_cache.invalidate(context.period, context.entity_name)

        # result is the ids of the new individuals corresponding to the source
        # entity
        if to_give_birth is not None:
            result = np.full(context_length(context), -1, dtype=int)
            if source_entity is target_entity:
                extra_bools = np.zeros(num_birth, dtype=bool)
                to_give_birth = np.concatenate((to_give_birth, extra_bools))
            # Note that np.place is a bit faster, but is currently buggy when
            # working with columns of structured arrays.
            # See https://github.com/numpy/numpy/issues/2462
            result[to_give_birth] = children['id']
            return result
        else:
            return None