def distinct_count(key, dataset_name): assert ('.' not in key or key.startswith('time.')) collection_name = 'distincts__%s' % dataset_name db = mongo.db() if collection_name not in db.collection_names(): update_distincts(dataset_name) db[collection_name].find({'values': key})
def compute_aggregates(self): '''\ This method has to be called as the last method when using the loader. It will add additional, required data to the database. ''' log.debug("updating distinct values...") update_distincts(self.dataset.name) log.debug("updating all cubes...") Cube.update_all_cubes(self.dataset)
def distinct(key, dataset_name=None, **query): '''Return the distinct values for `key` for all *Entry* objects matching the dataset_name or ***query*. It will query solr for a result. There may be short time frames where the result from solr does not match the distincts for a key in the datastore (mongodb). ``key`` The key of the field for which the distinct will be returned ``dataset`` A dataset name or a :class:`openspending.model.Dataset` object ``**query`` Parameters for an *AND* query. Only the *key* values objects matching these queries will be counted. If you want to query by dataset **don't** add the condition here, use *dataset_name*. Returns: A list of distinct values. ''' direct_mongo_query = False # the same keys used in serverside_js/compute_distincts.js not_aggregated_keys = ['_id', 'name', 'amount', 'classifiers', 'entities', 'currency'] if ((dataset_name is None) or (len(query) > 0) or (key in not_aggregated_keys)): direct_mongo_query = True else: dataset = Dataset.c.find_one({'name': dataset_name}, as_class=dict) if not dataset: raise ValueError('Dataset "%s" does not exist' % dataset_name) if not direct_mongo_query: collection_name = 'distincts__%s' % dataset_name db = mongo.db() if collection_name not in db.collection_names(): # We need to create the distincts collection first update_distincts(dataset_name) distincts_collection = db[collection_name] log.info('use distincts collection %s' % collection_name) return distincts_collection.find({'value.keys': key}).distinct('_id') if direct_mongo_query: if dataset_name is not None: query['dataset.name'] = dataset_name return Entry.c.find(query).distinct(key)
def used_keys(dataset_name): collection_name = 'distincts__%s' % dataset_name db = mongo.db() if collection_name not in db.collection_names(): update_distincts(dataset_name) db[collection_name].distinct('value')