def test_update(self): for dataset_name in self.TEST_DATASETS: dataset = Dataset.create(self.test_dataset_ids[dataset_name]) self.assertFalse('field' in dataset) Dataset.update(dataset, {'field': {'key': 'value'}}) dataset = Dataset.find_one(self.test_dataset_ids[dataset_name]) self.assertTrue('field' in dataset) self.assertEqual(dataset['field'], {'key': 'value'})
def summarize(dataset, query, select, group): """ Return a summary for the rows/values filtered by *query* and *select* and grouped by *group* or the overall summary if no group is specified. """ # narrow list of observations via query/select dframe = Observation.find(dataset, query, select, as_df=True) # do not allow group by numeric types # TODO check schema for valid groupby columns once included _type = dframe.dtypes.get(group) if group != ALL and (_type is None or _type.type != np.object_): return {ERROR: "group: '%s' is not categorical." % group} # check cached stats for group and update as necessary stats = dataset.get(STATS, {}) if not stats.get(group): stats = {ALL: summarize_df(dframe)} if group == ALL \ else summarize_with_groups(dframe, stats, group) Dataset.update(dataset, {STATS: stats}) stats_to_return = stats.get(group) return dict_from_mongo(stats_to_return if group == ALL else {group: stats_to_return})
except ParseError, err: # do not save record, return error return {ERROR: err} record = { DATASET_ID: dataset[DATASET_ID], cls.FORMULA: formula, cls.NAME: name, } cls.collection.insert(record) # invalidate summary ALL since we have a new column stats = dataset.get(STATS) if stats: del stats[ALL] del dataset[STATS] Dataset.update(dataset, {STATS: stats}) # call remote calculate and pass calculation id calculate_column.delay(dataset, dframe, formula, name) return mongo_remove_reserved_keys(record) @classmethod def find(cls, dataset): """ Return the calculations for given *dataset*. """ return [mongo_remove_reserved_keys(record) for record in cls.collection.find({ DATASET_ID: dataset[DATASET_ID], })]