示例#1
0
    def updated_dframe(self, dataset, formula, dframe):
        """Create a new aggregation and update return updated dframe."""
        # build column arguments from original dframe
        columns = parse_columns(dataset, formula, self.name, self.dframe)
        new_dframe = self.aggregation.eval(columns)

        new_columns = [x for x in new_dframe.columns if x not in self.groups]

        dframe = dframe.drop(new_columns, axis=1)
        dframe = group_join(self.groups, new_dframe, dframe)

        return dframe
示例#2
0
def update_calculations(record, dataset):
    calculations = dataset.calculations(include_aggs=False)

    if len(calculations):
        dframe = DataFrame(data=record, index=[0])
        labels_to_slugs = dataset.schema.labels_to_slugs

        for c in calculations:
            columns = parse_columns(dataset, c.formula, c.name, dframe=dframe)
            record[labels_to_slugs[c.name]] = columns[0][0]

    return record
示例#3
0
    def updated_dframe(self, dataset, formula, dframe):
        """Create a new aggregation and update return updated dframe."""
        # build column arguments from original dframe
        columns = parse_columns(dataset, formula, self.name, self.dframe)
        new_dframe = self.aggregation.eval(columns)

        new_columns = [x for x in new_dframe.columns if x not in self.groups]

        dframe = dframe.drop(new_columns, axis=1)
        dframe = group_join(self.groups, new_dframe, dframe)

        return dframe
def update_calculations(record, dataset):
    calculations = dataset.calculations(include_aggs=False)

    if len(calculations):
        dframe = DataFrame(data=record, index=[0])
        labels_to_slugs = dataset.schema.labels_to_slugs

        for c in calculations:
            columns = parse_columns(dataset, c.formula, c.name, dframe=dframe)
            record[labels_to_slugs[c.name]] = columns[0][0]

    return record
示例#5
0
def __create_aggregator(dataset, formula, name, groups, dframe=None):
    # TODO this should work with index eventually
    columns = parse_columns(dataset, formula, name, dframe, no_index=True)

    dependent_columns = Parser.dependent_columns(formula, dataset)
    aggregation = Parser.parse_aggregation(formula)

    # get dframe with only the necessary columns
    select = combine_dicts({group: 1 for group in groups},
                           {col: 1 for col in dependent_columns})

    # ensure at least one column (MONGO_ID) for the count aggregation
    query_args = QueryArgs(select=select or {MONGO_ID: 1})
    dframe = dataset.dframe(query_args=query_args, keep_mongo_keys=not select)

    return Aggregator(dframe, groups, aggregation, name, columns)
示例#6
0
def calculate_columns(dataset, calculations):
    """Calculate and store new columns for `calculations`.

    The new columns are join t othe Calculation dframe and replace the
    dataset's observations.

    .. note::

        This can result in race-conditions when:

        - deleting ``controllers.Datasets.DELETE``
        - updating ``controllers.Datasets.POST([dataset_id])``

        Therefore, perform these actions asychronously.

    :param dataset: The dataset to calculate for.
    :param calculations: A list of calculations.
    """
    new_cols = None

    for c in calculations:
        if c.aggregation:
            aggregator = __create_aggregator(
                dataset, c.formula, c.name, c.groups_as_list)
            aggregator.save(dataset)
        else:
            columns = parse_columns(dataset, c.formula, c.name)
            if new_cols is None:
                new_cols = DataFrame(columns[0])
            else:
                new_cols = new_cols.join(columns[0])

    if new_cols is not None:
        dataset.update_observations(new_cols)

    # propagate calculation to any merged child datasets
    [__propagate_column(x, dataset) for x in dataset.merged_datasets]