def create_from_list_or_dict(cls, dataset, calculations): calculations = to_list(calculations) if not len(calculations) or not isinstance(calculations, list): raise ArgumentError('Improper format for JSON calculations.') parsed_calculations = [] # Pull out args to check JSON format try: for c in calculations: groups = c.get("groups") if not isinstance(groups, list): groups = [groups] for group in groups: parsed_calculations.append([ c[cls.FORMULA], c[cls.NAME], group]) except KeyError as e: raise ArgumentError('Required key %s not found in JSON' % e) calculations = [cls().save(dataset, formula, name, group) for formula, name, group in parsed_calculations] call_async(calculate_task, calculations, dataset.clear_cache())
def create_from_list_or_dict(cls, dataset, calculations): calculations = to_list(calculations) if not len(calculations) or not isinstance(calculations, list) or\ any([not isinstance(e, dict) for e in calculations]): raise ArgumentError('Improper format for JSON calculations.') parsed_calculations = [] # Pull out args to check JSON format try: for c in calculations: groups = c.get("groups") if not isinstance(groups, list): groups = [groups] for group in groups: parsed_calculations.append( [c[cls.FORMULA], c[cls.NAME], group]) except KeyError as e: raise ArgumentError('Required key %s not found in JSON' % e) # Save instead of create so that we calculate on all at once. calculations = [ cls().save(dataset, formula, name, group) for formula, name, group in parsed_calculations ] call_async(calculate_task, calculations, dataset.clear_cache())
def add_observations(self, new_data): """Update `dataset` with `new_data`.""" update_id = uuid.uuid4().hex self.add_pending_update(update_id) new_data = to_list(new_data) # fetch data before other updates new_dframe_raw = dframe_from_update(self, new_data) call_async(calculate_updates, self, new_data, new_dframe_raw=new_dframe_raw, update_id=update_id)
def __slugify_data(new_data, labels_to_slugs): slugified_data = [] new_data = to_list(new_data) for row in new_data: for key, value in row.iteritems(): if labels_to_slugs.get(key) and key != MONGO_ID: del row[key] row[labels_to_slugs[key]] = value slugified_data.append(row) return slugified_data
def add_observations(self, new_data): """Update `dataset` with `new_data`.""" update_id = uuid.uuid4().hex self.add_pending_update(update_id) new_data = to_list(new_data) calculator = Calculator(self) new_dframe_raw = calculator.dframe_from_update( new_data, self.schema.labels_to_slugs) calculator._check_update_is_valid(new_dframe_raw) calculator.dataset.clear_cache() call_async(calculator.calculate_updates, calculator, new_data, new_dframe_raw=new_dframe_raw, update_id=update_id)
def delete_columns(self, columns): """Delete column `column` from this dataset. :param column: The column to delete. """ columns = set(self.schema.keys()).intersection(set(to_list(columns))) if not len(columns): raise ArgumentError("Columns: %s not in dataset.") Observation.delete_columns(self, columns) new_schema = self.schema [new_schema.pop(c) for c in columns] self.set_schema(new_schema, set_num_columns=True) return columns
def aggregated_dataset(self, groups): groups = to_list(groups) _id = self.aggregated_datasets_dict.get(self.join_groups(groups)) return self.find_one(_id) if _id else None