def sample(self, owner, cube, sample_size=None, fields=None, date=None, query=None): ''' Draws a sample of objects at random from the cube. Query sytax parsing is handled by `pql`. :param cube: cube name :param owner: username of cube owner :param sample_size: Size of the sample :param fields: Fields that should be returned :param date: date (metrique date range) that should be queried If date==None then the most recent versions of the objects will be queried :param query: query used to filter sampleset ''' self.requires_read(owner, cube) fields = self.get_fields(owner, cube, fields) query = query_add_date(query, date) spec = parse_pql_query(query) _cube = self.timeline(owner, cube) _docs = _cube.find(spec, fields=fields) n = _docs.count() if n <= sample_size: docs = tuple(_docs) else: to_sample = sorted(set(random.sample(xrange(n), sample_size))) docs = [_docs[i] for i in to_sample] return docs
def find(self, owner, cube, query, fields=None, date=None, sort=None, one=False, explain=False, merge_versions=True, skip=0, limit=0): self.cube_exists(owner, cube) self.requires_owner_read(owner, cube) sort = self.check_sort(sort) fields = self.get_fields(owner, cube, fields) if date is None or fields is None or ('_id' in fields and fields['_id']): merge_versions = False query = query or '' query = query_add_date(query, date) spec = parse_pql_query(query) _cube = self.timeline(owner, cube) if explain: result = _cube.find(spec, fields=fields, sort=sort, skip=skip, limit=limit).explain() elif one: result = _cube.find_one(spec, fields=fields, sort=sort, skip=skip, limit=limit) elif merge_versions: # merge_versions ignores sort (for now) result = self._merge_versions(_cube, spec, fields, skip=skip, limit=limit) else: result = tuple(_cube.find(spec, fields=fields, sort=sort, skip=skip, limit=limit)) return result
def remove_objects(self, owner, cube, query, date=None): ''' Remove all the objects (docs) from the given cube (mongodb collection) :param pymongo.collection cube: cube object (pymongo collection connection) :param string query: pql query string :param string date: metrique date(range) ''' self.cube_exists(owner, cube) self.requires_owner_admin(owner, cube) if not query: return [] if isinstance(query, basestring): query = query_add_date(query, date) spec = parse_pql_query(query) elif isinstance(query, (list, tuple)): spec = {'_id': {'$in': query}} else: raise ValueError( 'Expected query string or list of ids, got: %s' % type(query)) _cube = self.timeline(owner, cube, admin=True) return _cube.remove(spec)
def count(self, owner, cube, query, date=None): self.cube_exists(owner, cube) self.requires_owner_read(owner, cube) set_default(query, '') logger.info('pql query: %s' % query) try: spec = pql.find(query_add_date(query, date)) except Exception as e: self._raise(400, "Invalid Query (%s)" % str(e)) logger.debug('mongo query: %s' % spec) _cube = self.timeline(owner, cube) docs = _cube.find(spec=spec) return docs.count() if docs else 0
def find(self, owner, cube, query, fields=None, date=None, sort=None, one=False, explain=False, merge_versions=True, skip=0, limit=0): ''' Wrapper around pymongo's find() command. Query sytax parsing is handled by `pql`. :param cube: cube name :param owner: username of cube owner :param query: The query in pql :param fields: Fields that should be returned (comma-separated) :param date: date (metrique date range) that should be queried. If date==None then the most recent versions of the objects will be queried. :param explain: return execution plan instead of results :param merge_versions: merge versions where fields values equal :param one: return back only first matching object :param sort: return back results sorted :param skip: number of results matched to skip and not return :param limit: number of results matched to return of total found ''' self.requires_read(owner, cube) sort = self.check_sort(sort) fields = self.get_fields(owner, cube, fields) if date is None or fields is None or ('_id' in fields and fields['_id']): merge_versions = False query = query or '' query = query_add_date(query, date) spec = parse_pql_query(query) _cube = self.timeline(owner, cube) if explain: result = _cube.find(spec, fields=fields, sort=sort, skip=skip, limit=limit).explain() elif one: result = _cube.find_one(spec, fields=fields, sort=sort, skip=skip, limit=limit) elif merge_versions: # merge_versions ignores sort (for now) result = self._merge_versions(_cube, spec, fields, skip=skip, limit=limit) else: result = tuple(_cube.find(spec, fields=fields, sort=sort, skip=skip, limit=limit)) return result
def sample(self, owner, cube, sample_size=None, fields=None, date=None, query=None): self.cube_exists(owner, cube) self.requires_owner_read(owner, cube) fields = self.get_fields(owner, cube, fields) query = query_add_date(query, date) spec = parse_pql_query(query) _cube = self.timeline(owner, cube) _docs = _cube.find(spec, fields=fields) n = _docs.count() if n <= sample_size: docs = tuple(_docs) else: to_sample = sorted(set(random.sample(xrange(n), sample_size))) docs = [_docs[i] for i in to_sample] return docs
def count(self, owner, cube, query, date=None): ''' Wrapper around pymongo's find().count() command. Query sytax parsing is handled by `pql`. :param cube: cube name :param owner: username of cube owner :param query: The query in pql :param date: date (metrique date range) that should be queried If date==None then the most recent versions of the objects will be queried. ''' self.requires_read(owner, cube) query = query or '' query = query_add_date(query, date) # FIXME: logging move to parse_pql_query, after # logging refactor spec = parse_pql_query(query) _cube = self.timeline(owner, cube) docs = _cube.find(spec=spec) return docs.count() if docs else 0
def distinct(self, owner, cube, field, query=None, date=None): ''' Return back a distinct (unique) list of field values across the entire cube dataset Query sytax parsing is handled by `pql`. :param cube: cube name :param owner: username of cube owner :param field: field to get distinct token values from :param query: pql query to run as a pre-filter :param string date: metrique date(range) If query is provided, rather than running collection.distinct(field) directly, run on a find cursor. ''' self.requires_read(owner, cube) if isinstance(query, basestring): query = query_add_date(query, date) spec = parse_pql_query(query) result = self.timeline(owner, cube).find(spec).distinct(field) else: result = self.timeline(owner, cube).distinct(field) return result
def remove_objects(self, owner, cube, query, date=None): ''' Remove all the objects from the given cube. :param owner: username of cube owner :param cube: cube name :param string query: pql query string :param string date: metrique date(range) ''' self.requires_admin(owner, cube) if not query: return [] if isinstance(query, basestring): query = query_add_date(query, date) spec = parse_pql_query(query) elif isinstance(query, (list, tuple)): spec = {'_id': {'$in': query}} else: raise ValueError( 'Expected query string or list of ids, got: %s' % type(query)) _cube = self.timeline(owner, cube, admin=True) return _cube.remove(spec)