Exemplo n.º 1
0
def distinct_count(key, dataset_name):
    assert ('.' not in key or key.startswith('time.'))
    collection_name = 'distincts__%s' % dataset_name
    db = mongo.db()
    if collection_name not in db.collection_names():
        update_distincts(dataset_name)
    db[collection_name].find({'values': key})
Exemplo n.º 2
0
 def compute_aggregates(self):
     '''\
     This method has to be called as the last method when
     using the loader. It will add additional, required data
     to the database.
     '''
     log.debug("updating distinct values...")
     update_distincts(self.dataset.name)
     log.debug("updating all cubes...")
     Cube.update_all_cubes(self.dataset)
Exemplo n.º 3
0
def distinct(key, dataset_name=None, **query):
    '''Return the distinct values for `key` for all *Entry* objects
    matching the dataset_name or ***query*. It will query solr for
    a result. There may be short time frames where the result from
    solr does not match the distincts for a key in the datastore (mongodb).

    ``key``
        The key of the field for which the distinct will be returned
    ``dataset``
        A dataset name or a :class:`openspending.model.Dataset` object
    ``**query``
        Parameters for an *AND* query. Only the *key* values objects
        matching these queries will be counted. If you want to query
        by dataset **don't** add the condition here, use *dataset_name*.

    Returns: A list of distinct values.
    '''

    direct_mongo_query = False

    # the same keys used in serverside_js/compute_distincts.js
    not_aggregated_keys = ['_id', 'name', 'amount', 'classifiers',
                           'entities', 'currency']

    if ((dataset_name is None) or (len(query) > 0) or
        (key in not_aggregated_keys)):
        direct_mongo_query = True
    else:
        dataset = Dataset.c.find_one({'name': dataset_name},
                                    as_class=dict)
        if not dataset:
            raise ValueError('Dataset "%s" does not exist' % dataset_name)

    if not direct_mongo_query:
        collection_name = 'distincts__%s' % dataset_name
        db = mongo.db()

        if collection_name not in db.collection_names():
            # We need to create the distincts collection first
            update_distincts(dataset_name)
        distincts_collection = db[collection_name]
        log.info('use distincts collection %s' % collection_name)
        return distincts_collection.find({'value.keys': key}).distinct('_id')

    if direct_mongo_query:
        if dataset_name is not None:
            query['dataset.name'] = dataset_name
        return Entry.c.find(query).distinct(key)
Exemplo n.º 4
0
def used_keys(dataset_name):
    collection_name = 'distincts__%s' % dataset_name
    db = mongo.db()
    if collection_name not in db.collection_names():
        update_distincts(dataset_name)
    db[collection_name].distinct('value')