Exemplo n.º 1
0
 def test_update(self):
     for dataset_name in self.TEST_DATASETS:
         dataset = Dataset.create(self.test_dataset_ids[dataset_name])
         self.assertFalse('field' in dataset)
         Dataset.update(dataset, {'field': {'key': 'value'}})
         dataset = Dataset.find_one(self.test_dataset_ids[dataset_name])
         self.assertTrue('field' in dataset)
         self.assertEqual(dataset['field'], {'key': 'value'})
Exemplo n.º 2
0
def summarize(dataset, query, select, group):
    """
    Return a summary for the rows/values filtered by *query* and *select*
    and grouped by *group* or the overall summary if no group is specified.
    """
    # narrow list of observations via query/select
    dframe = Observation.find(dataset, query, select, as_df=True)

    # do not allow group by numeric types
    # TODO check schema for valid groupby columns once included
    _type = dframe.dtypes.get(group)
    if group != ALL and (_type is None or _type.type != np.object_):
        return {ERROR: "group: '%s' is not categorical." % group}

    # check cached stats for group and update as necessary
    stats = dataset.get(STATS, {})
    if not stats.get(group):
        stats = {ALL: summarize_df(dframe)} if group == ALL \
                else summarize_with_groups(dframe, stats, group)
        Dataset.update(dataset, {STATS: stats})
    stats_to_return = stats.get(group)

    return dict_from_mongo(stats_to_return if group == ALL else {group:
            stats_to_return})
Exemplo n.º 3
0
        except ParseError, err:
            # do not save record, return error
            return {ERROR: err}

        record = {
            DATASET_ID: dataset[DATASET_ID],
            cls.FORMULA: formula,
            cls.NAME: name,
        }
        cls.collection.insert(record)

        # invalidate summary ALL since we have a new column
        stats = dataset.get(STATS)
        if stats:
            del stats[ALL]
            del dataset[STATS]
            Dataset.update(dataset, {STATS: stats})

        # call remote calculate and pass calculation id
        calculate_column.delay(dataset, dframe, formula, name)
        return mongo_remove_reserved_keys(record)

    @classmethod
    def find(cls, dataset):
        """
        Return the calculations for given *dataset*.
        """
        return [mongo_remove_reserved_keys(record) for record in cls.collection.find({
            DATASET_ID: dataset[DATASET_ID],
        })]