Пример #1
0
    def sample(self, owner, cube, sample_size=None, fields=None,
               date=None, query=None):
        '''
        Draws a sample of objects at random from the cube.

        Query sytax parsing is handled by `pql`.

        :param cube: cube name
        :param owner: username of cube owner
        :param sample_size: Size of the sample
        :param fields: Fields that should be returned
        :param date: date (metrique date range) that should be queried
                        If date==None then the most recent versions of
                        the objects will be queried
        :param query: query used to filter sampleset
        '''
        self.requires_read(owner, cube)
        fields = self.get_fields(owner, cube, fields)
        query = query_add_date(query, date)
        spec = parse_pql_query(query)
        _cube = self.timeline(owner, cube)
        _docs = _cube.find(spec, fields=fields)
        n = _docs.count()
        if n <= sample_size:
            docs = tuple(_docs)
        else:
            to_sample = sorted(set(random.sample(xrange(n), sample_size)))
            docs = [_docs[i] for i in to_sample]
        return docs
Пример #2
0
    def find(self, owner, cube, query, fields=None, date=None,
             sort=None, one=False, explain=False, merge_versions=True,
             skip=0, limit=0):
        self.cube_exists(owner, cube)
        self.requires_owner_read(owner, cube)

        sort = self.check_sort(sort)
        fields = self.get_fields(owner, cube, fields)

        if date is None or fields is None or ('_id' in fields and
                                              fields['_id']):
            merge_versions = False

        query = query or ''
        query = query_add_date(query, date)
        spec = parse_pql_query(query)

        _cube = self.timeline(owner, cube)
        if explain:
            result = _cube.find(spec, fields=fields, sort=sort,
                                skip=skip, limit=limit).explain()
        elif one:
            result = _cube.find_one(spec, fields=fields, sort=sort,
                                    skip=skip, limit=limit)
        elif merge_versions:
            # merge_versions ignores sort (for now)
            result = self._merge_versions(_cube, spec, fields,
                                          skip=skip, limit=limit)
        else:
            result = tuple(_cube.find(spec, fields=fields, sort=sort,
                                      skip=skip, limit=limit))
        return result
Пример #3
0
    def remove_objects(self, owner, cube, query, date=None):
        '''
        Remove all the objects (docs) from the given
        cube (mongodb collection)

        :param pymongo.collection cube:
            cube object (pymongo collection connection)
        :param string query:
            pql query string
        :param string date:
            metrique date(range)
        '''
        self.cube_exists(owner, cube)
        self.requires_owner_admin(owner, cube)
        if not query:
            return []

        if isinstance(query, basestring):
            query = query_add_date(query, date)
            spec = parse_pql_query(query)
        elif isinstance(query, (list, tuple)):
            spec = {'_id': {'$in': query}}
        else:
            raise ValueError(
                'Expected query string or list of ids, got: %s' % type(query))

        _cube = self.timeline(owner, cube, admin=True)
        return _cube.remove(spec)
Пример #4
0
 def count(self, owner, cube, query, date=None):
     self.cube_exists(owner, cube)
     self.requires_owner_read(owner, cube)
     set_default(query, '')
     logger.info('pql query: %s' % query)
     try:
         spec = pql.find(query_add_date(query, date))
     except Exception as e:
         self._raise(400, "Invalid Query (%s)" % str(e))
     logger.debug('mongo query: %s' % spec)
     _cube = self.timeline(owner, cube)
     docs = _cube.find(spec=spec)
     return docs.count() if docs else 0
Пример #5
0
    def find(self, owner, cube, query, fields=None, date=None,
             sort=None, one=False, explain=False, merge_versions=True,
             skip=0, limit=0):
        '''
        Wrapper around pymongo's find() command.

        Query sytax parsing is handled by `pql`.

        :param cube: cube name
        :param owner: username of cube owner
        :param query: The query in pql
        :param fields: Fields that should be returned (comma-separated)
        :param date: date (metrique date range) that should be queried.
                    If date==None then the most recent versions of the
                    objects will be queried.
        :param explain: return execution plan instead of results
        :param merge_versions: merge versions where fields values equal
        :param one: return back only first matching object
        :param sort: return back results sorted
        :param skip: number of results matched to skip and not return
        :param limit: number of results matched to return of total found
        '''
        self.requires_read(owner, cube)

        sort = self.check_sort(sort)
        fields = self.get_fields(owner, cube, fields)

        if date is None or fields is None or ('_id' in fields and
                                              fields['_id']):
            merge_versions = False

        query = query or ''
        query = query_add_date(query, date)
        spec = parse_pql_query(query)

        _cube = self.timeline(owner, cube)
        if explain:
            result = _cube.find(spec, fields=fields, sort=sort,
                                skip=skip, limit=limit).explain()
        elif one:
            result = _cube.find_one(spec, fields=fields, sort=sort,
                                    skip=skip, limit=limit)
        elif merge_versions:
            # merge_versions ignores sort (for now)
            result = self._merge_versions(_cube, spec, fields,
                                          skip=skip, limit=limit)
        else:
            result = tuple(_cube.find(spec, fields=fields, sort=sort,
                                      skip=skip, limit=limit))
        return result
Пример #6
0
 def sample(self, owner, cube, sample_size=None, fields=None,
            date=None, query=None):
     self.cube_exists(owner, cube)
     self.requires_owner_read(owner, cube)
     fields = self.get_fields(owner, cube, fields)
     query = query_add_date(query, date)
     spec = parse_pql_query(query)
     _cube = self.timeline(owner, cube)
     _docs = _cube.find(spec, fields=fields)
     n = _docs.count()
     if n <= sample_size:
         docs = tuple(_docs)
     else:
         to_sample = sorted(set(random.sample(xrange(n), sample_size)))
         docs = [_docs[i] for i in to_sample]
     return docs
Пример #7
0
    def count(self, owner, cube, query, date=None):
        '''
        Wrapper around pymongo's find().count() command.

        Query sytax parsing is handled by `pql`.

        :param cube: cube name
        :param owner: username of cube owner
        :param query: The query in pql
        :param date: date (metrique date range) that should be queried
                           If date==None then the most recent versions of the
                           objects will be queried.
        '''
        self.requires_read(owner, cube)

        query = query or ''
        query = query_add_date(query, date)
        # FIXME: logging move to parse_pql_query, after
        # logging refactor
        spec = parse_pql_query(query)
        _cube = self.timeline(owner, cube)
        docs = _cube.find(spec=spec)
        return docs.count() if docs else 0
Пример #8
0
    def distinct(self, owner, cube, field, query=None, date=None):
        '''
        Return back a distinct (unique) list of field values
        across the entire cube dataset

        Query sytax parsing is handled by `pql`.

        :param cube: cube name
        :param owner: username of cube owner
        :param field: field to get distinct token values from
        :param query: pql query to run as a pre-filter
        :param string date: metrique date(range)

        If query is provided, rather than running collection.distinct(field)
        directly, run on a find cursor.
        '''
        self.requires_read(owner, cube)
        if isinstance(query, basestring):
            query = query_add_date(query, date)
            spec = parse_pql_query(query)
            result = self.timeline(owner, cube).find(spec).distinct(field)
        else:
            result = self.timeline(owner, cube).distinct(field)
        return result
Пример #9
0
    def remove_objects(self, owner, cube, query, date=None):
        '''
        Remove all the objects from the given cube.

        :param owner: username of cube owner
        :param cube: cube name
        :param string query: pql query string
        :param string date: metrique date(range)
        '''
        self.requires_admin(owner, cube)
        if not query:
            return []

        if isinstance(query, basestring):
            query = query_add_date(query, date)
            spec = parse_pql_query(query)
        elif isinstance(query, (list, tuple)):
            spec = {'_id': {'$in': query}}
        else:
            raise ValueError(
                'Expected query string or list of ids, got: %s' % type(query))

        _cube = self.timeline(owner, cube, admin=True)
        return _cube.remove(spec)