Exemplo n.º 1
0
 def testKeyRangesFromList(self):
   list_of_key_ranges = [key_range.KeyRange(db.Key.from_path("TestEntity", 1)),
                         key_range.KeyRange(db.Key.from_path("TestEntity", 2)),
                         key_range.KeyRange(db.Key.from_path("TestEntity", 3))]
   kranges = key_ranges.KeyRangesFactory.create_from_list(
       list(list_of_key_ranges))
   self._assertEqualsAndSerialize(list_of_key_ranges, kranges)
Exemplo n.º 2
0
 def testSplitNSByScatter_LotsOfData(self):
     """Split lots of data for each shard."""
     testutil._create_entities(range(100), {
         "80": 80,
         "50": 50,
         "30": 30,
         "10": 10
     },
                               ns="google")
     expected = [
         key_range.KeyRange(key_start=None,
                            key_end=testutil.key("30", namespace="google"),
                            direction="ASC",
                            include_start=False,
                            include_end=False,
                            namespace="google",
                            _app=self.appid),
         key_range.KeyRange(key_start=testutil.key("30",
                                                   namespace="google"),
                            key_end=testutil.key("80", namespace="google"),
                            direction="ASC",
                            include_start=True,
                            include_end=False,
                            namespace="google",
                            _app=self.appid),
         key_range.KeyRange(key_start=testutil.key("80",
                                                   namespace="google"),
                            key_end=None,
                            direction="ASC",
                            include_start=True,
                            include_end=False,
                            namespace="google",
                            _app=self.appid),
     ]
     self._assertEquals_splitNSByScatter(3, expected, ns="google")
Exemplo n.º 3
0
 def testSplitNSByScatter_NotEnoughData2(self):
     """Splits should not intersect, if there's not enough data for each."""
     testutil._create_entities(range(10), {"2": 2, "4": 4})
     expected = [
         key_range.KeyRange(key_start=None,
                            key_end=testutil.key("2"),
                            direction="ASC",
                            include_start=False,
                            include_end=False,
                            namespace="",
                            _app=self.appid),
         key_range.KeyRange(key_start=testutil.key("2"),
                            key_end=testutil.key("4"),
                            direction="ASC",
                            include_start=True,
                            include_end=False,
                            namespace="",
                            _app=self.appid),
         key_range.KeyRange(key_start=testutil.key("4"),
                            key_end=None,
                            direction="ASC",
                            include_start=True,
                            include_end=False,
                            namespace="",
                            _app=self.appid), None
     ]
     self._assertEquals_splitNSByScatter(4, expected)
Exemplo n.º 4
0
  def testKeyRangesFromNSRange(self):
    namespaces = ["1", "3", "5"]
    self.create_entities_in_multiple_ns(namespaces)
    ns_range = namespace_range.NamespaceRange("0", "5", _app=self.app)
    kranges = key_ranges.KeyRangesFactory.create_from_ns_range(ns_range)

    expected = [key_range.KeyRange(namespace="1", _app=self.app),
                key_range.KeyRange(namespace="3", _app=self.app),
                key_range.KeyRange(namespace="5", _app=self.app)]
    self._assertEqualsAndSerialize(expected, kranges)
Exemplo n.º 5
0
    def _split_input_from_namespace(cls, app, namespace, entity_kind_name,
                                    shard_count):
        """Return KeyRange objects. Helper for _split_input_from_params."""

        raw_entity_kind = util.get_short_name(entity_kind_name)

        if shard_count == 1:
            return [key_range.KeyRange(namespace=namespace, _app=app)]

        ds_query = datastore.Query(kind=raw_entity_kind,
                                   namespace=namespace,
                                   _app=app,
                                   keys_only=True)
        ds_query.Order("__scatter__")
        random_keys = ds_query.Get(shard_count * cls._OVERSAMPLING_FACTOR)
        if not random_keys:
            return [key_range.KeyRange(namespace=namespace, _app=app)]
        random_keys.sort()
        split_points_count = shard_count - 1
        if len(random_keys) > split_points_count:
            random_keys = [
                random_keys[len(random_keys) * i / split_points_count]
                for i in range(split_points_count)
            ]

        key_ranges = []

        key_ranges.append(
            key_range.KeyRange(key_start=None,
                               key_end=random_keys[0],
                               direction=key_range.KeyRange.ASC,
                               include_start=False,
                               include_end=False,
                               namespace=namespace))

        for i in range(0, len(random_keys) - 1):
            key_ranges.append(
                key_range.KeyRange(key_start=random_keys[i],
                                   key_end=random_keys[i + 1],
                                   direction=key_range.KeyRange.ASC,
                                   include_start=True,
                                   include_end=False,
                                   namespace=namespace))

        key_ranges.append(
            key_range.KeyRange(key_start=random_keys[-1],
                               key_end=None,
                               direction=key_range.KeyRange.ASC,
                               include_start=True,
                               include_end=False,
                               namespace=namespace))

        return key_ranges
Exemplo n.º 6
0
    def _iter_ns_range(self):
        """Iterates over self._ns_range, delegating to self._iter_key_range()."""
        while True:
            if self._current_key_range is None:
                query = self._ns_range.make_datastore_query()
                namespace_result = query.Get(1)
                if not namespace_result:
                    break

                namespace = namespace_result[0].name() or ""
                self._current_key_range = key_range.KeyRange(
                    namespace=namespace, _app=self._ns_range.app)

            for key, o in self._iter_key_range(
                    copy.deepcopy(self._current_key_range)):
                self._current_key_range.advance(key)
                yield o

            if (self._ns_range.is_single_namespace
                    or self._current_key_range.namespace
                    == self._ns_range.namespace_end):
                break
            self._ns_range = self._ns_range.with_start_after(
                self._current_key_range.namespace)
            self._current_key_range = None
Exemplo n.º 7
0
    def next(self):
        if self._ns_range is None:
            raise StopIteration()

        self._last_ns = self._iter.next()
        if self._last_ns == self._ns_range.namespace_end:
            self._ns_range = None
        return key_range.KeyRange(namespace=self._last_ns,
                                  _app=self._ns_range.app)
Exemplo n.º 8
0
    def __next__(self):
        if self._ns_range is None:
            raise StopIteration()

        self._last_ns = next(self._iter)
        current_ns_range = self._ns_range
        if self._last_ns == self._ns_range.namespace_end:
            self._ns_range = None
        return key_range.KeyRange(namespace=self._last_ns,
                                  _app=current_ns_range.app)
 def _create_iter(self, iter_cls, entity_kind):
     kranges = [key_range.KeyRange(namespace=ns) for ns in self.namespaces]
     kranges = key_ranges.KeyRangesFactory.create_from_list(kranges)
     query_spec = model.QuerySpec(
         entity_kind=util.get_short_name(entity_kind),
         batch_size=10,
         filters=self.filters,
         model_class_path=entity_kind)
     itr = db_iters.RangeIteratorFactory.create_key_ranges_iterator(
         kranges, query_spec, iter_cls)
     return itr
Exemplo n.º 10
0
    def _split_input_from_params(cls, app, namespaces, entity_kind_name,
                                 params, shard_count):
        readers = super(ConsistentKeyReader,
                        cls)._split_input_from_params(app, namespaces,
                                                      entity_kind_name, params,
                                                      shard_count)

        if not readers:
            key_ranges = [
                key_range.KeyRange(namespace=namespace, _app=app)
                for namespace in namespaces
            ]
            readers = [cls(entity_kind_name, key_ranges)]

        return readers
    def _create_iter(self, iter_cls, entity_kind):
        key_start = db.Key.from_path(util.get_short_name(entity_kind),
                                     "0",
                                     namespace=self.namespace)
        key_end = db.Key.from_path(util.get_short_name(entity_kind),
                                   "999",
                                   namespace=self.namespace)
        krange = key_range.KeyRange(key_start,
                                    key_end,
                                    include_start=True,
                                    include_end=True,
                                    namespace=self.namespace)

        query_spec = model.QuerySpec(
            entity_kind=util.get_short_name(entity_kind),
            batch_size=10,
            filters=self.filters,
            model_class_path=entity_kind)
        return iter_cls(krange, query_spec)
    def _split_ns_by_scatter(cls, shard_count, namespace, raw_entity_kind,
                             app):
        """Split a namespace by scatter index into key_range.KeyRange.

    TODO: Power this with key_range.KeyRange.compute_split_points.

    Args:
      shard_count: number of shards.
      namespace: namespace name to split. str.
      raw_entity_kind: low level datastore API entity kind.
      app: app id in str.

    Returns:
      A list of key_range.KeyRange objects. If there are not enough entities to
    splits into requested shards, the returned list will contain KeyRanges
    ordered lexicographically with any Nones appearing at the end.
    """
        if shard_count == 1:

            return [key_range.KeyRange(namespace=namespace, _app=app)]

        ds_query = datastore.Query(kind=raw_entity_kind,
                                   namespace=namespace,
                                   _app=app,
                                   keys_only=True)
        ds_query.Order("__scatter__")
        oversampling_factor = 32
        random_keys = ds_query.Get(shard_count * oversampling_factor)

        if not random_keys:

            return ([key_range.KeyRange(namespace=namespace, _app=app)] +
                    [None] * (shard_count - 1))

        random_keys.sort()

        if len(random_keys) >= shard_count:

            random_keys = cls._choose_split_points(random_keys, shard_count)

        k_ranges = []

        k_ranges.append(
            key_range.KeyRange(key_start=None,
                               key_end=random_keys[0],
                               direction=key_range.KeyRange.ASC,
                               include_start=False,
                               include_end=False,
                               namespace=namespace,
                               _app=app))

        for i in range(0, len(random_keys) - 1):
            k_ranges.append(
                key_range.KeyRange(key_start=random_keys[i],
                                   key_end=random_keys[i + 1],
                                   direction=key_range.KeyRange.ASC,
                                   include_start=True,
                                   include_end=False,
                                   namespace=namespace,
                                   _app=app))

        k_ranges.append(
            key_range.KeyRange(key_start=random_keys[-1],
                               key_end=None,
                               direction=key_range.KeyRange.ASC,
                               include_start=True,
                               include_end=False,
                               namespace=namespace,
                               _app=app))

        if len(k_ranges) < shard_count:

            k_ranges += [None] * (shard_count - len(k_ranges))
        return k_ranges
Exemplo n.º 13
0
class DatastoreInputReader(InputReader):
    """Represents a range in query results.

  DatastoreInputReader yields model instances from the entities in a given key
  range. Iterating over DatastoreInputReader changes its range past consumed
  entries.

  The class shouldn't be instantiated directly. Use the split_input class method
  instead.
  """

    _BATCH_SIZE = 50

    _MAX_SHARD_COUNT = 256

    ENTITY_KIND_PARAM = "entity_kind"
    KEYS_ONLY_PARAM = "keys_only"
    BATCH_SIZE_PARAM = "batch_size"
    KEY_RANGE_PARAM = "key_range"

    def __init__(self, entity_kind, key_ranges, batch_size=_BATCH_SIZE):
        """Create new DatastoreInputReader object.

    This is internal constructor. Use split_query instead.

    Args:
      entity_kind: entity kind as string.
      key_ranges: a sequence of key_range.KeyRange instances to process.
      batch_size: size of read batch as int.
    """
        self._entity_kind = entity_kind
        self._key_ranges = list(reversed(key_ranges))
        self._batch_size = int(batch_size)

    def __iter__(self):
        """Create a generator for model instances for entities.

    Iterating through entities moves query range past the consumed entities.

    Yields:
      next model instance.
    """
        while True:
            if self._current_key_range is None:
                break

            while True:
                query = self._current_key_range.make_ascending_query(
                    util.for_name(self._entity_kind))
                results = query.fetch(limit=self._batch_size)

                if not results:
                    self._advance_key_range()
                    break

                for model_instance in results:
                    key = model_instance.key()

                    self._current_key_range.advance(key)
                    yield model_instance

    @property
    def _current_key_range(self):
        if self._key_ranges:
            return self._key_ranges[-1]
        else:
            return None

    def _advance_key_range(self):
        if self._key_ranges:
            self._key_ranges.pop()

    @classmethod
    def _split_input_from_namespace(cls, app, namespace, entity_kind_name,
                                    shard_count):
        """Return KeyRange objects. Helper for _split_input_from_params."""

        raw_entity_kind = util.get_short_name(entity_kind_name)

        ds_query = datastore.Query(kind=raw_entity_kind,
                                   namespace=namespace,
                                   _app=app,
                                   keys_only=True)
        ds_query.Order("__key__")
        first_entity_key_list = ds_query.Get(1)
        if not first_entity_key_list:
            logging.warning("Could not retrieve an entity of type %s." %
                            raw_entity_kind)
            return []
        first_entity_key = first_entity_key_list[0]
        ds_query.Order(("__key__", datastore.Query.DESCENDING))
        try:
            last_entity_key, = ds_query.Get(1)
        except db.NeedIndexError, e:
            logging.warning(
                "Cannot create accurate approximation of keyspace, "
                "guessing instead. Please address this problem: %s", e)
            last_entity_key = key_range.KeyRange.guess_end_key(
                raw_entity_kind, first_entity_key)
        full_keyrange = key_range.KeyRange(first_entity_key,
                                           last_entity_key,
                                           None,
                                           True,
                                           True,
                                           namespace=namespace,
                                           _app=app)
        key_ranges = [full_keyrange]
        number_of_half_splits = int(math.floor(math.log(shard_count, 2)))
        for _ in range(0, number_of_half_splits):
            new_ranges = []
            for r in key_ranges:
                new_ranges += r.split_range(1)
            key_ranges = new_ranges
        return key_ranges
Exemplo n.º 14
0
    def testToKeyRangesByShard_UnevenNamespaces(self):
        namespaces = [str(i) for i in range(3)]
        testutil._create_entities(range(10), {"5": 5}, namespaces[0])
        testutil._create_entities(range(10), {"5": 5, "6": 6}, namespaces[1])
        testutil._create_entities(range(10), {
            "5": 5,
            "6": 6,
            "7": 7
        }, namespaces[2])
        shards = 3

        expected = [
            # shard 1
            key_range.KeyRange(key_start=None,
                               key_end=testutil.key("5", namespace="0"),
                               direction="ASC",
                               include_start=False,
                               include_end=False,
                               namespace="0",
                               _app=self.appid),
            key_range.KeyRange(key_start=None,
                               key_end=testutil.key("5", namespace="1"),
                               direction="ASC",
                               include_start=False,
                               include_end=False,
                               namespace="1",
                               _app=self.appid),
            key_range.KeyRange(key_start=None,
                               key_end=testutil.key("6", namespace="2"),
                               direction="ASC",
                               include_start=False,
                               include_end=False,
                               namespace="2",
                               _app=self.appid),
            # shard 2
            key_range.KeyRange(key_start=testutil.key("5", namespace="0"),
                               key_end=None,
                               direction="ASC",
                               include_start=True,
                               include_end=False,
                               namespace="0",
                               _app=self.appid),
            key_range.KeyRange(key_start=testutil.key("5", namespace="1"),
                               key_end=testutil.key("6", namespace="1"),
                               direction="ASC",
                               include_start=True,
                               include_end=False,
                               namespace="1",
                               _app=self.appid),
            key_range.KeyRange(key_start=testutil.key("6", namespace="2"),
                               key_end=testutil.key("7", namespace="2"),
                               direction="ASC",
                               include_start=True,
                               include_end=False,
                               namespace="2",
                               _app=self.appid),
            # shard 3
            key_range.KeyRange(key_start=testutil.key("6", namespace="1"),
                               key_end=None,
                               direction="ASC",
                               include_start=True,
                               include_end=False,
                               namespace="1",
                               _app=self.appid),
            key_range.KeyRange(key_start=testutil.key("7", namespace="2"),
                               key_end=None,
                               direction="ASC",
                               include_start=True,
                               include_end=False,
                               namespace="2",
                               _app=self.appid),
        ]
        kranges_by_shard = (self.reader_cls._to_key_ranges_by_shard(
            self.appid, namespaces, shards,
            model.QuerySpec(entity_kind="TestEntity")))
        self.assertEquals(shards, len(kranges_by_shard))

        expected.sort()
        results = []
        for kranges in kranges_by_shard:
            results.extend(list(kranges))
        results.sort()
        self.assertEquals(expected, results)