Пример #1
0
    def test_merge_in_place(self):
        self.assertEquals([], util.merge_in_place())

        # test massive list merge
        list1 = [0, 1, 5, 6, 8, 9, 15]
        list2 = [0, 2, 3, 5, 8, 10, 11, 17]
        list3 = [1, 4, 6, 8, 10, 15, 16]
        list4 = [-1, 19]
        list5 = [20]
        list6 = []

        util.merge_in_place(list1, list2, list3, list4, list5, list6,
            dup_fn=lambda x, y: x == y)

        self.assertEquals(
            [-1, 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 15, 16, 17, 19, 20],
            list1)
Пример #2
0
  def proximity_fetch(cls, query, center, max_results=10, max_distance=0):
     """Performs a proximity/radius fetch on the given query.
 
     Fetches at most <max_results> entities matching the given query,"""
@@ -169,7 +170,7 @@
     def _merge_results_in_place(a, b):
       util.merge_in_place(a, b,
                         cmp_fn=lambda x, y: cmp(x[1], y[1]),
                         dup_fn=lambda x, y: x[0].key == y[0].key)
Пример #3
0
    def test_merge_in_place(self):
        self.assertEquals([], util.merge_in_place())

        # test massive list merge
        list1 = [0, 1, 5, 6, 8, 9, 15]
        list2 = [0, 2, 3, 5, 8, 10, 11, 17]
        list3 = [1, 4, 6, 8, 10, 15, 16]
        list4 = [-1, 19]
        list5 = [20]
        list6 = []

        util.merge_in_place(list1,
                            list2,
                            list3,
                            list4,
                            list5,
                            list6,
                            dup_fn=lambda x, y: x == y)

        self.assertEquals(
            [-1, 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 15, 16, 17, 19, 20], list1)
Пример #4
0
  def bounding_box_fetch(query, bbox, max_results=1000,
                         cost_function=None):
    """Performs a bounding box fetch on the given query.

    Fetches entities matching the given query with an additional filter
    matching only those entities that are inside of the given rectangular
    bounding box.

    Args:
      query: A db.Query on entities of this kind that should be additionally
          filtered by bounding box and subsequently fetched.
      bbox: A geotypes.Box indicating the bounding box to filter entities by.
      max_results: An optional int indicating the maximum number of desired
          results.
      cost_function: An optional function that accepts two arguments:
          * num_cells: the number of cells to search
          * resolution: the resolution of each cell to search
          and returns the 'cost' of querying against this number of cells
          at the given resolution.

    Returns:
      The fetched entities.

    Raises:
      Any exceptions that google.appengine.ext.db.Query.fetch() can raise.
    """
    # TODO(romannurik): Check for GqlQuery.
    results = []

    if cost_function is None:
      cost_function = default_cost_function
    query_geocells = geocell.best_bbox_search_cells(bbox, cost_function)

    if query_geocells:
      if query._Query__orderings:
        # NOTE(romannurik): since 'IN' queries seem boken in App Engine,
        # manually search each geocell and then merge results in-place
        cell_results = [copy.deepcopy(query)
            .filter('location_geocells =', search_cell)
            .fetch(max_results) for search_cell in query_geocells]

        # Manual in-memory sort on the query's defined ordering.
        query_orderings = query._Query__orderings or []
        def _ordering_fn(ent1, ent2):
          for prop, direction in query_orderings:
            prop_cmp = cmp(getattr(ent1, prop), getattr(ent2, prop))
            if prop_cmp != 0:
              return prop_cmp if direction == 1 else -prop_cmp

          return -1  # Default ent1 < ent2.

        # Duplicates aren't possible so don't provide a dup_fn.
        util.merge_in_place(cmp_fn=_ordering_fn, *cell_results)
        results = cell_results[0][:max_results]
      else:
        # NOTE: We can't pass in max_results because of non-uniformity of the
        # search.
        results = (query
            .filter('location_geocells IN', query_geocells)
            .fetch(1000))[:max_results]
    else:
      results = []

    if DEBUG:
      logging.info('bbox query looked in %d geocells' % len(query_geocells))

    # In-memory filter.
    return [entity for entity in results if
        entity.location.lat >= bbox.south and
        entity.location.lat <= bbox.north and
        entity.location.lon >= bbox.west and
        entity.location.lon <= bbox.east]
Пример #5
0
 def _merge_results_in_place(a, b):
   util.merge_in_place(a, b,
                     cmp_fn=lambda x, y: cmp(x[1], y[1]),
                     dup_fn=lambda x, y: x[0].key() == y[0].key())
Пример #6
0
  def bounding_box_fetch(query, bbox, max_results=1000,
                         cost_function=None):
    """Performs a bounding box fetch on the given query.

    Fetches entities matching the given query with an additional filter
    matching only those entities that are inside of the given rectangular
    bounding box.

    Args:
      query: A db.Query on entities of this kind that should be additionally
          filtered by bounding box and subsequently fetched.
      bbox: A geotypes.Box indicating the bounding box to filter entities by.
      max_results: An optional int indicating the maximum number of desired
          results.
      cost_function: An optional function that accepts two arguments:
          * num_cells: the number of cells to search
          * resolution: the resolution of each cell to search
          and returns the 'cost' of querying against this number of cells
          at the given resolution.

    Returns:
      The fetched entities.

    Raises:
      Any exceptions that google.appengine.ext.db.Query.fetch() can raise.
    """
    log.info("bounding_box_fetch started");
    log.info("Query count is %d" % query.count())
    # TODO(romannurik): Check for GqlQuery.
    results = []

    if cost_function is None:
      cost_function = default_cost_function
    query_geocells = geocell.best_bbox_search_cells(bbox, cost_function)

    if query_geocells:
      log.info("query_geocells has some value")
      if query._Query__orderings:
        log.info("query_orderings has some value")
        # NOTE(romannurik): since 'IN' queries seem broken in App Engine,
        # manually search each geocell and then merge results in-place

        # NOTE(hturner): 'IN' is not broken. For each item in the list, a
        # separate subquery is performed. GQL only allows 30 total subqueries
        # for any single GQL query, and the location_geocells often contains
        # more than 30 items. Hence the 'IN' queries are not working for those
        # locations. Additionally, cursors cannot be used with 'IN' queries, so
        # that may be another issue.

        # We can probably fix this. We are duplicating a ton of
        # data. Ex: 9,9a,9a3,9a37,9a378,9a3787,
        # Note that if we could just store the 'final' geocell and remove one
        # char at a time, we get the same result. Note that a left to right
        # comparison of the chars would work, while just searching for if the
        # query cell is contained in the final cell would not
        cell_results = [copy.deepcopy(query)
          .filter('location_geocells =', search_cell)
          .fetch(max_results) for search_cell in query_geocells]

        # Manual in-memory sort on the query's defined ordering.
        query_orderings = query._Query__orderings or []
        def _ordering_fn(ent1, ent2):
          for prop, direction in query_orderings:
            prop_cmp = cmp(getattr(ent1, prop), getattr(ent2, prop))
            if prop_cmp != 0:
              return prop_cmp if direction == 1 else -prop_cmp

          return -1  # Default ent1 < ent2.

        # Duplicates aren't possible so don't provide a dup_fn.
        util.merge_in_place(cmp_fn=_ordering_fn, *cell_results)
        results = cell_results[0][:max_results]
      else:
        # NOTE: We can't pass in max_results because of non-uniformity of the
        # search.
        log.info("query_orderings has no value, therefore we are using GQL \
                 to perform the filtering")
        log.info("query_geocells are '%s'" % str(query_geocells))
        results = (query.filter('location_geocells IN', query_geocells).fetch(1000))[:max_results]
        log.info("Returning %d results" % len(results))
    else:
      log.info("query_geocells was None, no results being returned")
      results = []

    if DEBUG:
      log.info('bbox query looked in %d geocells' % len(query_geocells))

    # In-memory filter.
    return [entity for entity in results if
        entity.location.lat >= bbox.south and
        entity.location.lat <= bbox.north and
        entity.location.lon >= bbox.west and
        entity.location.lon <= bbox.east]