Exemplo n.º 1
0
    def compute(self, context, bool_expr):
        entity = context.entity

        baseperiod = entity.base_period
        period = context.period - 1
        value = expr_eval(bool_expr, context)

        # using a full int so that the "store" type check works
        result = value.astype(np.int)
        res_size = len(entity.array)
        last_period_true = np.full(res_size, period + 1, dtype=np.int)

        id_to_rownum = context.id_to_rownum
        still_running = value.copy()
        while np.any(still_running) and period >= baseperiod:
            ids, values = self.value_for_period(bool_expr,
                                                period,
                                                context,
                                                fill=None)
            missing = np.ones(res_size, dtype=bool)
            period_value = np.zeros(res_size, dtype=bool)
            if len(ids):
                value_rows = id_to_rownum[ids]
                safe_put(missing, value_rows, False)
                safe_put(period_value, value_rows, values)

            value = still_running & period_value
            result += value * (last_period_true - period)

            still_running &= period_value | missing
            last_period_true[period_value] = period
            period -= 1
        return result
Exemplo n.º 2
0
    def evaluate(self, context):
        entity = context['__entity__']

        baseperiod = entity.base_period
        period = context['period'] - 1
        expr = self.expr

        typemap = {bool: int, int: int, float: float}
        res_type = typemap[dtype(expr, context)]
        res_size = len(entity.array)

        sum_values = np.zeros(res_size, dtype=res_type)
        id_to_rownum = context.id_to_rownum
        while period >= baseperiod:
            ids, values = entity.value_for_period(expr, period, context,
                                                  fill=None)

            # filter out lines which are present because there was a value for
            # that individual at that period but not for that column
            acceptable_rows = hasvalue(values)
            acceptable_ids = ids[acceptable_rows]
            if len(acceptable_ids):
                acceptable_values = values[acceptable_rows]

                value_rows = id_to_rownum[acceptable_ids]

                period_value = np.zeros(res_size, dtype=np.float)
                safe_put(period_value, value_rows, acceptable_values)

                sum_values += period_value
            period -= 1
        return sum_values
Exemplo n.º 3
0
    def evaluate(self, context):
        entity = context['__entity__']

        baseperiod = entity.base_period
        period = context['period'] - 1
        bool_expr = self.expr
        value = expr_eval(bool_expr, context)

        # using a full int so that the "store" type check works
        result = value.astype(np.int)
        res_size = len(entity.array)
        last_period_true = np.empty(res_size, dtype=np.int)
        last_period_true.fill(period + 1)

        id_to_rownum = context.id_to_rownum
        still_running = value.copy()
        while np.any(still_running) and period >= baseperiod:
            ids, values = entity.value_for_period(bool_expr, period, context,
                                                  fill=None)
            missing = np.ones(res_size, dtype=bool)
            period_value = np.zeros(res_size, dtype=bool)
            if len(ids):
                value_rows = id_to_rownum[ids]
                safe_put(missing, value_rows, False)
                safe_put(period_value, value_rows, values)

            value = still_running & period_value
            result += value * (last_period_true - period)

            still_running &= period_value | missing
            last_period_true[period_value] = period
            period -= 1
        return result
Exemplo n.º 4
0
    def compute(self, context, expr):
        entity = context.entity

        baseperiod = entity.base_period
        period = context.period - 1

        typemap = {bool: int, int: int, float: float}
        res_type = typemap[getdtype(expr, context)]
        res_size = len(entity.array)

        sum_values = np.zeros(res_size, dtype=res_type)
        id_to_rownum = context.id_to_rownum
        while period >= baseperiod:
            ids, values = self.value_for_period(expr,
                                                period,
                                                context,
                                                fill=None)

            # filter out lines which are present because there was a value for
            # that individual at that period but not for that column
            acceptable_rows = hasvalue(values)
            acceptable_ids = ids[acceptable_rows]
            if len(acceptable_ids):
                acceptable_values = values[acceptable_rows]

                value_rows = id_to_rownum[acceptable_ids]

                period_value = np.zeros(res_size, dtype=np.float)
                safe_put(period_value, value_rows, acceptable_values)

                sum_values += period_value
            period -= 1
        return sum_values
Exemplo n.º 5
0
    def fill_missing_values(self, ids, values, context, filler='auto'):
        '''ids: ids present in past period
           context: current period context'''
        if filler is 'auto':
            filler = get_missing_value(values)
        result = np.empty(context_length(context), dtype=values.dtype)
        result.fill(filler)
        if len(ids):
            id_to_rownum = context.id_to_rownum
            # if there was more objects in the past than in the current
            # period. Currently, remove() keeps old ids, so this never
            # happens, but if we ever change remove(), we'll need to add
            # such a check everywhere we use id_to_rownum
#            invalid_ids = ids > len(id_to_rownum)
#            if np.any(invalid_ids):
#                fix ids
            rows = id_to_rownum[ids]
            safe_put(result, rows, values)
        return result
Exemplo n.º 6
0
    def compute(self, context, expr):
        entity = context.entity

        baseperiod = entity.base_period
        period = context.period - 1

        res_size = len(entity.array)

        num_values = np.zeros(res_size, dtype=np.int)
        # current period
        last_period_wh_value = np.full(res_size, context.period, dtype=np.int)

        sum_values = np.zeros(res_size, dtype=np.float)
        id_to_rownum = context.id_to_rownum
        while period >= baseperiod:
            ids, values = self.value_for_period(expr,
                                                period,
                                                context,
                                                fill=None)

            # filter out lines which are present because there was a value for
            # that individual at that period but not for that column
            acceptable_rows = hasvalue(values)
            acceptable_ids = ids[acceptable_rows]
            if len(acceptable_ids):
                acceptable_values = values[acceptable_rows]

                value_rows = id_to_rownum[acceptable_ids]

                has_value = np.zeros(res_size, dtype=bool)
                safe_put(has_value, value_rows, True)

                period_value = np.zeros(res_size, dtype=np.float)
                safe_put(period_value, value_rows, acceptable_values)

                num_values += has_value * (last_period_wh_value - period)
                sum_values += period_value
                last_period_wh_value[has_value] = period
            period -= 1
        return sum_values / num_values
Exemplo n.º 7
0
    def evaluate(self, context):
        entity = context['__entity__']

        baseperiod = entity.base_period
        period = context['period'] - 1
        expr = self.expr

        res_size = len(entity.array)

        num_values = np.zeros(res_size, dtype=np.int)
        last_period_wh_value = np.empty(res_size, dtype=np.int)
        last_period_wh_value.fill(context['period'])  # current period

        sum_values = np.zeros(res_size, dtype=np.float)
        id_to_rownum = context.id_to_rownum
        while period >= baseperiod:
            ids, values = entity.value_for_period(expr, period, context,
                                                  fill=None)

            # filter out lines which are present because there was a value for
            # that individual at that period but not for that column
            acceptable_rows = hasvalue(values)
            acceptable_ids = ids[acceptable_rows]
            if len(acceptable_ids):
                acceptable_values = values[acceptable_rows]

                value_rows = id_to_rownum[acceptable_ids]

                has_value = np.zeros(res_size, dtype=bool)
                safe_put(has_value, value_rows, True)

                period_value = np.zeros(res_size, dtype=np.float)
                safe_put(period_value, value_rows, acceptable_values)

                num_values += has_value * (last_period_wh_value - period)
                sum_values += period_value
                last_period_wh_value[has_value] = period
            period -= 1
        return sum_values / num_values
Exemplo n.º 8
0
    def optimize_processes(self):
        """
        Common subexpression elimination
        """
#<<<<<<< HEAD

        if filler is 'auto':
            filler = get_missing_value(values)
        result = np.empty(context_length(context), dtype=values.dtype)
        result.fill(filler)
        if len(ids):
            id_to_rownum = context.id_to_rownum
            # if there was more objects in the past than in the current
            # period. Currently, remove() keeps old ids, so this never
            # happens, but if we ever change remove(), we'll need to add
            # such a check everywhere we use id_to_rownum
#            invalid_ids = ids > len(id_to_rownum)
#            if np.any(invalid_ids):
#                fix ids
            rows = id_to_rownum[ids]
            safe_put(result, rows, values)
        return result
Exemplo n.º 9
0
    def fill_missing_values(ids, values, context, filler='auto'):
        """
        ids: ids present in past period
        values: values in past period
        context: current period context
        """

        if filler is 'auto':
            filler = get_default_value(values)
        result = np.full(context_length(context), filler, dtype=values.dtype)
        if len(ids):
            id_to_rownum = context.id_to_rownum
            # if there was more objects in the past than in the current
            # period. Currently, remove() keeps old ids, so this never
            # happens, but if we ever change remove(), we'll need to add
            # such a check everywhere we use id_to_rownum
            # invalid_ids = ids > len(id_to_rownum)
            # if np.any(invalid_ids):
            #     fix ids
            rows = id_to_rownum[ids]
            safe_put(result, rows, values)
        return result
Exemplo n.º 10
0
    def compute(self, context, bool_expr):
        entity = context.entity

        baseperiod = entity.base_period
        lag_idx = context['period_idx'] - 1
        period = context['periods'][lag_idx]
        bool_expr = self.expr
        value = expr_eval(bool_expr, context)

        # using a full int so that the "store" type check works
        result = value.astype(np.int)
        res_size = len(entity.array)
        last_period_true = np.empty(res_size, dtype=np.int)
        last_period_true.fill(period + 1)

        id_to_rownum = context.id_to_rownum
        still_running = value.copy()
        
        print( 'Warning : duration works only with year0 so far')             
        while np.any(still_running) and period >= baseperiod:
            ids, values = self.value_for_period(bool_expr, period, context,
                                                fill=None)
            missing = np.ones(res_size, dtype=bool)
            period_value = np.zeros(res_size, dtype=bool)
            if len(ids):
                value_rows = id_to_rownum[ids]
                safe_put(missing, value_rows, False)
                safe_put(period_value, value_rows, values)

            value = still_running & period_value
            result += value * (last_period_true - period)

            still_running &= period_value | missing
            last_period_true[period_value] = period
            period -= 1
        
        return result
Exemplo n.º 11
0
Arquivo: data.py Projeto: gvk489/liam2
def merge_subset_in_array(output, id_to_rownum, subset, first=False,
                          default_values=None):
    default_values = default_values if default_values is not None else {}
    if subset.dtype == output.dtype and len(subset) == len(output):
        return subset
    elif subset.dtype == output.dtype:
        safe_put(output, id_to_rownum[subset['id']], subset)
        return output

    output_names = output.dtype.names
    subset_names = subset.dtype.names
    names_to_copy = set(subset_names) & set(output_names)
    if len(subset) == len(output):
        for fname in names_to_copy:
            output[fname] = subset[fname]
        return output
    else:
        rownums = id_to_rownum[subset['id']]
        # TODO: this is a gross approximation, more research is needed to get
        # a better threshold. It might also depend on "first".
        if len(names_to_copy) > len(output_names) / 2:
            if first:
                subset_all_cols = np.empty(len(subset), dtype=output.dtype)
                for fname in set(output_names) - set(subset_names):
                    default_value = default_values.get(fname, None)
                    subset_all_cols[fname] = \
                        get_default_value(subset_all_cols[fname], default_value)
            else:
                subset_all_cols = output[rownums]
                # Note that all rows which correspond to rownums == -1 have
                # wrong values (they have the value of the last row) but it is
                # not necessary to correct them since they will not be copied
                # back into output_array.
                # np.putmask(subset_all_cols, rownums == -1, missing_row)
            for fname in names_to_copy:
                subset_all_cols[fname] = subset[fname]
            safe_put(output, rownums, subset_all_cols)
        else:
            for fname in names_to_copy:
                safe_put(output[fname], rownums, subset[fname])
        return output
Exemplo n.º 12
0
def merge_subset_in_array(output, id_to_rownum, subset, first=False):
    if subset.dtype == output.dtype and len(subset) == len(output):
        return subset
    elif subset.dtype == output.dtype:
        safe_put(output, id_to_rownum[subset['id']], subset)
        return output

    output_names = output.dtype.names
    subset_names = subset.dtype.names
    names_to_copy = set(subset_names) & set(output_names)
    if len(subset) == len(output):
        for fname in names_to_copy:
            output[fname] = subset[fname]
        return output
    else:
        rownums = id_to_rownum[subset['id']]
        #TODO: this is a gross approximation, more research is needed to get
        # a better threshold. It might also depend on "first".
        if len(names_to_copy) > len(output_names) / 2:
            if first:
                subset_all_cols = np.empty(len(subset), dtype=output.dtype)
                for fname in set(output_names) - set(subset_names):
                    subset_all_cols[fname] = \
                        get_missing_value(subset_all_cols[fname])
            else:
                subset_all_cols = output[rownums]
                # Note that all rows which correspond to rownums == -1 have
                # wrong values (they have the value of the last row) but it is
                # not necessary to correct them since they will not be copied
                # back into output_array.
                # np.putmask(subset_all_cols, rownums == -1, missing_row)
            for fname in names_to_copy:
                subset_all_cols[fname] = subset[fname]
            safe_put(output, rownums, subset_all_cols)
        else:
            for fname in names_to_copy:
                safe_put(output[fname], rownums, subset[fname])
        return output
Exemplo n.º 13
0
def build_period_array(input_table, output_fields, input_rows, input_index,
                       start_period):
    periods_before = [p for p in input_rows.iterkeys() if p <= start_period]
    if not periods_before:
        id_to_rownum = np.empty(0, dtype=int)
        output_array = ColumnArray.empty(0, np.dtype(output_fields))
        return output_array, id_to_rownum

    periods_before.sort()
    # take the last period which we have data for
    target_period = periods_before[-1]

    # computing is present
    max_id = len(input_index[target_period]) - 1
    period_id_to_rownum = None
    present_in_period = None
    is_present = np.zeros(max_id + 1, dtype=bool)
    for period in periods_before:
        period_id_to_rownum = input_index[period]
        present_in_period = period_id_to_rownum != -1
        present_in_period.resize(max_id + 1)
        is_present |= present_in_period

    # if all individuals are present in the target period, we are done already!
    if np.array_equal(present_in_period, is_present):
        start, stop = input_rows[target_period]
        input_array = ColumnArray.from_table(input_table, start, stop)
        input_array.add_and_drop_fields(output_fields)
        return input_array, period_id_to_rownum

    # building id_to_rownum for the target period
    id_to_rownum = np.empty(max_id + 1, dtype=int)
    id_to_rownum.fill(-1)
    rownum = 0
    for row_id, present in enumerate(is_present):
        if present:
            id_to_rownum[row_id] = rownum
            rownum += 1

    # computing the source row for each destination row
    # we loop over the periods before start_period in reverse order
    output_array_source_rows = np.empty(rownum, dtype=int)
    output_array_source_rows.fill(-1)
    for period in periods_before[::-1]:
        start, stop = input_rows[period]
        input_rownums = np.arange(start, stop)

        input_id_to_rownum = input_index[period]
        id_is_in_period = input_id_to_rownum != -1

        # which output rows are filled by input for this period
        output_rownums = id_to_rownum[id_is_in_period]

        # get source rows (in the global array) for individuals in this period
        source_rows = output_array_source_rows[output_rownums]

        # if their source row is already known, leave them alone
        need_update = source_rows == -1

        # global indices of rows which are not set yet (for this period)
        rows_to_update = output_rownums[need_update]

        # source row for those rows
        local_source_rows = input_rownums[need_update]

        # update the source row for those rows
        safe_put(output_array_source_rows, rows_to_update, local_source_rows)

        if np.all(output_array_source_rows != -1):
            break

    # reading data
    output_array = ColumnArray.from_table_coords(input_table,
                                                 output_array_source_rows)
    output_array.add_and_drop_fields(output_fields)
    return output_array, id_to_rownum
Exemplo n.º 14
0
Arquivo: data.py Projeto: gvk489/liam2
def build_period_array(input_table, output_fields, input_rows,
                       input_index, start_period, default_values=None):
    periods_before = [p for p in input_rows.iterkeys() if p <= start_period]
    if not periods_before:
        id_to_rownum = np.empty(0, dtype=int)
        output_array = ColumnArray.empty(0, np.dtype(output_fields))
        return output_array, id_to_rownum

    periods_before.sort()
    # take the last period which we have data for
    target_period = periods_before[-1]

    # computing is_present
    max_id = len(input_index[target_period]) - 1
    period_id_to_rownum = None
    present_in_period = None
    is_present = np.zeros(max_id + 1, dtype=bool)
    for period in periods_before:
        period_id_to_rownum = input_index[period]
        present_in_period = period_id_to_rownum != -1
        present_in_period.resize(max_id + 1, refcheck=False)
        is_present |= present_in_period

    # if all individuals are present in the target period, we are done already!
    if np.array_equal(present_in_period, is_present):
        start, stop = input_rows[target_period]
        input_array = ColumnArray.from_table(input_table, start, stop)
        input_array.add_and_drop_fields(output_fields, default_values)
        return input_array, period_id_to_rownum

    # building id_to_rownum for the target period
    id_to_rownum = np.full(max_id + 1, -1, dtype=int)
    rownum = 0
    for row_id, present in enumerate(is_present):
        if present:
            id_to_rownum[row_id] = rownum
            rownum += 1

    # computing the source row for each destination row
    # we loop over the periods before start_period in reverse order
    output_array_source_rows = np.full(rownum, -1, dtype=int)
    for period in periods_before[::-1]:
        start, stop = input_rows[period]
        input_rownums = np.arange(start, stop)

        input_id_to_rownum = input_index[period]
        id_is_in_period = input_id_to_rownum != -1

        # which output rows are filled by input for this period
        output_rownums = id_to_rownum[id_is_in_period]

        # get source rows (in the global array) for individuals in this period
        source_rows = output_array_source_rows[output_rownums]

        # if their source row is already known, leave them alone
        need_update = source_rows == -1

        # global indices of rows which are not set yet (for this period)
        rows_to_update = output_rownums[need_update]

        # source row for those rows
        local_source_rows = input_rownums[need_update]

        # update the source row for those rows
        safe_put(output_array_source_rows, rows_to_update, local_source_rows)

        if np.all(output_array_source_rows != -1):
            break

    # reading data
    output_array = ColumnArray.from_table_coords(input_table,
                                                 output_array_source_rows)
    output_array.add_and_drop_fields(output_fields, default_values)
    return output_array, id_to_rownum