def split_input(cls, job_config):
        """Inherit docs."""
        params = job_config.input_reader_params
        shard_count = job_config.shard_count
        query_spec = cls._get_query_spec(params)

        if not property_range.should_shard_by_property_range(
                query_spec.filters):
            return super(ModelDatastoreInputReader,
                         cls).split_input(job_config)

        p_range = property_range.PropertyRange(query_spec.filters,
                                               query_spec.model_class_path)
        p_ranges = p_range.split(shard_count)

        # User specified a namespace.
        if query_spec.ns:
            ns_range = namespace_range.NamespaceRange(
                namespace_start=query_spec.ns,
                namespace_end=query_spec.ns,
                _app=query_spec.app)
            ns_ranges = [copy.copy(ns_range) for _ in p_ranges]
        else:
            ns_keys = namespace_range.get_namespace_keys(
                query_spec.app, cls.MAX_NAMESPACES_FOR_KEY_SHARD + 1)
            if not ns_keys:
                return
            # User doesn't specify ns but the number of ns is small.
            # We still split by property range.
            if len(ns_keys) <= cls.MAX_NAMESPACES_FOR_KEY_SHARD:
                ns_ranges = [
                    namespace_range.NamespaceRange(_app=query_spec.app)
                    for _ in p_ranges
                ]
            # Lots of namespaces. Split by ns.
            else:
                ns_ranges = namespace_range.NamespaceRange.split(
                    n=shard_count,
                    contiguous=False,
                    can_query=lambda: True,
                    _app=query_spec.app)
                p_ranges = [copy.copy(p_range) for _ in ns_ranges]

        assert len(p_ranges) == len(ns_ranges)

        iters = [
            db_iters.RangeIteratorFactory.create_property_range_iterator(
                p, ns, query_spec) for p, ns in zip(p_ranges, ns_ranges)
        ]
        return [cls(i) for i in iters]
 def testSplitWithNoNamespacesInDatastoreWithContiguous(self):
     self.assertEqual([namespace_range.NamespaceRange(_app=self.app_id)],
                      namespace_range.NamespaceRange.split(
                          10,
                          contiguous=True,
                          can_query=lambda: True,
                          _app=self.app_id))
 def testSplitWithoutQueriesWithContiguous(self):
     self.assertEqual([
         namespace_range.NamespaceRange(
             namespace_start='', namespace_end='abc', _app=self.app_id),
         namespace_range.NamespaceRange(
             namespace_start='ac', namespace_end='bb', _app=self.app_id),
         namespace_range.NamespaceRange(
             namespace_start='bba', namespace_end='caa', _app=self.app_id),
         namespace_range.NamespaceRange(
             namespace_start='cab', namespace_end='ccc', _app=self.app_id)
     ],
                      namespace_range.NamespaceRange.split(
                          4,
                          contiguous=True,
                          can_query=lambda: False,
                          _app=self.app_id))
    def testQueryPaging(self):
        self.mox.StubOutClassWithMocks(datastore, 'Query')
        ns_range = namespace_range.NamespaceRange(namespace_start='a',
                                                  namespace_end='b',
                                                  _app=self.app_id)
        ns_kind = '__namespace__'
        ns_key = lambda ns: db.Key.from_path(ns_kind, ns)
        filters = {'__key__ >= ': ns_key('a'), '__key__ <= ': ns_key('b')}

        def ExpectQuery(cursor):
            return datastore.Query(ns_kind,
                                   filters=filters,
                                   keys_only=True,
                                   cursor=cursor,
                                   _app=self.app_id)

        query = ExpectQuery(None)
        query.Run(limit=3).AndReturn([ns_key(ns) for ns in ['a', 'aa', 'aaa']])
        query.GetCursor().AndReturn('c1')

        query = ExpectQuery('c1')
        query.Run(limit=3).AndReturn(
            [ns_key(ns) for ns in ['aab', 'ab', 'ac']])
        query.GetCursor().AndReturn('c2')

        query = ExpectQuery('c2')
        query.Run(limit=3).AndReturn([ns_key('b')])

        self.mox.ReplayAll()
        self.assertEqual(7, len(list(ns_range)))
Exemplo n.º 5
0
  def testKeyRangesFromNSRange(self):
    namespaces = ["1", "3", "5"]
    self.create_entities_in_multiple_ns(namespaces)
    ns_range = namespace_range.NamespaceRange("0", "5", _app=self.app)
    kranges = key_ranges.KeyRangesFactory.create_from_ns_range(ns_range)

    expected = [key_range.KeyRange(namespace="1", _app=self.app),
                key_range.KeyRange(namespace="3", _app=self.app),
                key_range.KeyRange(namespace="5", _app=self.app)]
    self._assertEqualsAndSerialize(expected, kranges)
    def testSplitWithOnlyDefaultNamespaceWithContiguous(self):
        self.CreateInNamespace('')

        self.assertEqual([
            namespace_range.NamespaceRange(
                namespace_start='', namespace_end='ccc', _app=self.app_id)
        ],
                         namespace_range.NamespaceRange.split(
                             10,
                             contiguous=True,
                             can_query=lambda: True,
                             _app=self.app_id))
 def _create_iter(self, entity_kind):
     query_spec = model.QuerySpec(
         entity_kind=util.get_short_name(entity_kind),
         batch_size=10,
         filters=self.filters,
         model_class_path=entity_kind)
     p_range = property_range.PropertyRange(self.filters, entity_kind)
     ns_range = namespace_range.NamespaceRange(self.namespaces[0],
                                               self.namespaces[-1])
     itr = db_iters.RangeIteratorFactory.create_property_range_iterator(
         p_range, ns_range, query_spec)
     return itr
    def testSplitWithInfiniteQueriesLargerSplitThanNamespaces(self):
        # Create 6 namespaces and split by 10 ranges.
        self.CreateInNamespace('a')
        self.CreateInNamespace('aa')
        self.CreateInNamespace('aab')
        self.CreateInNamespace('b')
        self.CreateInNamespace('bac')
        self.CreateInNamespace('cca')

        self.assertEqual([
            namespace_range.NamespaceRange(namespace_start='a',
                                           namespace_end='a'),
            namespace_range.NamespaceRange(namespace_start='aa',
                                           namespace_end='aa'),
            namespace_range.NamespaceRange(namespace_start='aab',
                                           namespace_end='aab'),
            namespace_range.NamespaceRange(namespace_start='b',
                                           namespace_end='b'),
            namespace_range.NamespaceRange(namespace_start='bac',
                                           namespace_end='bac'),
            namespace_range.NamespaceRange(namespace_start='cca',
                                           namespace_end='cca')
        ],
                         namespace_range.NamespaceRange.split(
                             10, contiguous=False, can_query=lambda: True))
    def testSplitWithInfiniteQueriesSmallerSplitThanNamespaces(self):
        # Create 6 namespaces and split by 3 ranges. Use contiguous data for this
        # test (although we are not testing contiguous) and since the mid-point is
        # rounded down, skip 'aa' and 'aba' so that start and end of each range
        # will match exactly and contain 2 items.
        self.CreateInNamespace('a')
        self.CreateInNamespace('aaa')
        self.CreateInNamespace('aab')
        self.CreateInNamespace('aac')
        self.CreateInNamespace('ab')
        self.CreateInNamespace('abb')

        self.assertEqual([
            namespace_range.NamespaceRange(namespace_start='a',
                                           namespace_end='aaa'),
            namespace_range.NamespaceRange(namespace_start='aab',
                                           namespace_end='aac'),
            namespace_range.NamespaceRange(namespace_start='ab',
                                           namespace_end='abb')
        ],
                         namespace_range.NamespaceRange.split(
                             3, contiguous=False, can_query=lambda: True))
Exemplo n.º 10
0
  def _split_input_from_params(cls, app, namespaces, entity_kind_name,
                               params, shard_count):
    readers = super(ConsistentKeyReader, cls)._split_input_from_params(
        app,
        namespaces,
        entity_kind_name,
        params,
        shard_count)
    # We always produce at least one namespace range because:
    # a) there might be unapplied entities
    # b) it simplifies mapper code
    if not readers:
      readers = [cls(entity_kind_name,
                     key_ranges=None,
                     ns_range=namespace_range.NamespaceRange(),
                     batch_size=shard_count)]

    return readers
    def testSplitWithInfiniteQueriesWithContiguous(self):
        self.CreateInNamespace('a')
        self.CreateInNamespace('aa')
        self.CreateInNamespace('aab')
        self.CreateInNamespace('b')
        self.CreateInNamespace('bac')
        self.CreateInNamespace('cca')

        self.assertEqual([
            namespace_range.NamespaceRange(namespace_start='',
                                           namespace_end='a'),
            namespace_range.NamespaceRange(namespace_start='aa',
                                           namespace_end='aaa'),
            namespace_range.NamespaceRange(namespace_start='aab',
                                           namespace_end='acc'),
            namespace_range.NamespaceRange(namespace_start='b',
                                           namespace_end='bab'),
            namespace_range.NamespaceRange(namespace_start='bac',
                                           namespace_end='cc'),
            namespace_range.NamespaceRange(namespace_start='cca',
                                           namespace_end='ccc')
        ],
                         namespace_range.NamespaceRange.split(
                             10, contiguous=True, can_query=lambda: True))
 def testFromJsonObjectWithApp(self):
     self.assertEqual(
         namespace_range.NamespaceRange('a', 'b', _app='myapp'),
         namespace_range.NamespaceRange.from_json_object(
             dict(namespace_start='a', namespace_end='b', app='myapp')))
 def testToJsonObject(self):
     self.assertEqual(
         dict(namespace_start='a', namespace_end='b'),
         namespace_range.NamespaceRange('a', 'b').to_json_object())