def test_merge_in_place(self): self.assertEquals([], util.merge_in_place()) # test massive list merge list1 = [0, 1, 5, 6, 8, 9, 15] list2 = [0, 2, 3, 5, 8, 10, 11, 17] list3 = [1, 4, 6, 8, 10, 15, 16] list4 = [-1, 19] list5 = [20] list6 = [] util.merge_in_place(list1, list2, list3, list4, list5, list6, dup_fn=lambda x, y: x == y) self.assertEquals( [-1, 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 15, 16, 17, 19, 20], list1)
def proximity_fetch(cls, query, center, max_results=10, max_distance=0): """Performs a proximity/radius fetch on the given query. Fetches at most <max_results> entities matching the given query,""" @@ -169,7 +170,7 @@ def _merge_results_in_place(a, b): util.merge_in_place(a, b, cmp_fn=lambda x, y: cmp(x[1], y[1]), dup_fn=lambda x, y: x[0].key == y[0].key)
def bounding_box_fetch(query, bbox, max_results=1000, cost_function=None): """Performs a bounding box fetch on the given query. Fetches entities matching the given query with an additional filter matching only those entities that are inside of the given rectangular bounding box. Args: query: A db.Query on entities of this kind that should be additionally filtered by bounding box and subsequently fetched. bbox: A geotypes.Box indicating the bounding box to filter entities by. max_results: An optional int indicating the maximum number of desired results. cost_function: An optional function that accepts two arguments: * num_cells: the number of cells to search * resolution: the resolution of each cell to search and returns the 'cost' of querying against this number of cells at the given resolution. Returns: The fetched entities. Raises: Any exceptions that google.appengine.ext.db.Query.fetch() can raise. """ # TODO(romannurik): Check for GqlQuery. results = [] if cost_function is None: cost_function = default_cost_function query_geocells = geocell.best_bbox_search_cells(bbox, cost_function) if query_geocells: if query._Query__orderings: # NOTE(romannurik): since 'IN' queries seem boken in App Engine, # manually search each geocell and then merge results in-place cell_results = [copy.deepcopy(query) .filter('location_geocells =', search_cell) .fetch(max_results) for search_cell in query_geocells] # Manual in-memory sort on the query's defined ordering. query_orderings = query._Query__orderings or [] def _ordering_fn(ent1, ent2): for prop, direction in query_orderings: prop_cmp = cmp(getattr(ent1, prop), getattr(ent2, prop)) if prop_cmp != 0: return prop_cmp if direction == 1 else -prop_cmp return -1 # Default ent1 < ent2. # Duplicates aren't possible so don't provide a dup_fn. util.merge_in_place(cmp_fn=_ordering_fn, *cell_results) results = cell_results[0][:max_results] else: # NOTE: We can't pass in max_results because of non-uniformity of the # search. results = (query .filter('location_geocells IN', query_geocells) .fetch(1000))[:max_results] else: results = [] if DEBUG: logging.info('bbox query looked in %d geocells' % len(query_geocells)) # In-memory filter. return [entity for entity in results if entity.location.lat >= bbox.south and entity.location.lat <= bbox.north and entity.location.lon >= bbox.west and entity.location.lon <= bbox.east]
def _merge_results_in_place(a, b): util.merge_in_place(a, b, cmp_fn=lambda x, y: cmp(x[1], y[1]), dup_fn=lambda x, y: x[0].key() == y[0].key())
def bounding_box_fetch(query, bbox, max_results=1000, cost_function=None): """Performs a bounding box fetch on the given query. Fetches entities matching the given query with an additional filter matching only those entities that are inside of the given rectangular bounding box. Args: query: A db.Query on entities of this kind that should be additionally filtered by bounding box and subsequently fetched. bbox: A geotypes.Box indicating the bounding box to filter entities by. max_results: An optional int indicating the maximum number of desired results. cost_function: An optional function that accepts two arguments: * num_cells: the number of cells to search * resolution: the resolution of each cell to search and returns the 'cost' of querying against this number of cells at the given resolution. Returns: The fetched entities. Raises: Any exceptions that google.appengine.ext.db.Query.fetch() can raise. """ log.info("bounding_box_fetch started"); log.info("Query count is %d" % query.count()) # TODO(romannurik): Check for GqlQuery. results = [] if cost_function is None: cost_function = default_cost_function query_geocells = geocell.best_bbox_search_cells(bbox, cost_function) if query_geocells: log.info("query_geocells has some value") if query._Query__orderings: log.info("query_orderings has some value") # NOTE(romannurik): since 'IN' queries seem broken in App Engine, # manually search each geocell and then merge results in-place # NOTE(hturner): 'IN' is not broken. For each item in the list, a # separate subquery is performed. GQL only allows 30 total subqueries # for any single GQL query, and the location_geocells often contains # more than 30 items. Hence the 'IN' queries are not working for those # locations. Additionally, cursors cannot be used with 'IN' queries, so # that may be another issue. # We can probably fix this. We are duplicating a ton of # data. Ex: 9,9a,9a3,9a37,9a378,9a3787, # Note that if we could just store the 'final' geocell and remove one # char at a time, we get the same result. Note that a left to right # comparison of the chars would work, while just searching for if the # query cell is contained in the final cell would not cell_results = [copy.deepcopy(query) .filter('location_geocells =', search_cell) .fetch(max_results) for search_cell in query_geocells] # Manual in-memory sort on the query's defined ordering. query_orderings = query._Query__orderings or [] def _ordering_fn(ent1, ent2): for prop, direction in query_orderings: prop_cmp = cmp(getattr(ent1, prop), getattr(ent2, prop)) if prop_cmp != 0: return prop_cmp if direction == 1 else -prop_cmp return -1 # Default ent1 < ent2. # Duplicates aren't possible so don't provide a dup_fn. util.merge_in_place(cmp_fn=_ordering_fn, *cell_results) results = cell_results[0][:max_results] else: # NOTE: We can't pass in max_results because of non-uniformity of the # search. log.info("query_orderings has no value, therefore we are using GQL \ to perform the filtering") log.info("query_geocells are '%s'" % str(query_geocells)) results = (query.filter('location_geocells IN', query_geocells).fetch(1000))[:max_results] log.info("Returning %d results" % len(results)) else: log.info("query_geocells was None, no results being returned") results = [] if DEBUG: log.info('bbox query looked in %d geocells' % len(query_geocells)) # In-memory filter. return [entity for entity in results if entity.location.lat >= bbox.south and entity.location.lat <= bbox.north and entity.location.lon >= bbox.west and entity.location.lon <= bbox.east]