Exemplo n.º 1
0
    def test_unique_put(self):
        # Ensure empty starting DB
        result = Unique().query().fetch(limit=None)
        self.assertFalse(result)

        # Test basic functionality with blocking DB calls
        kind1 = "foo"
        str1 = "bar"
        kind2 = "baz"
        str2 = "qux"
        result = Unique.get_next(kind1, str1)
        self.assertEqual(str1 + "1", result)
        result = Unique.get_next(kind1, str1)
        self.assertEqual(str1 + "2", result)
        result = Unique.get_next(kind1, str2)
        self.assertEqual(str2 + "1", result)
        result = Unique.get_next(kind2, str1)
        self.assertEqual(str1 + "1", result)

        # Test raw entity manipulation (no shard)
        ent = Unique.get_raw_entity(kind1, str1)
        ent_moot = ent.get_and_increment()
        self.assertEqual(3, ent_moot)
        ent_moot = ent.get_and_increment()
        self.assertEqual(4, ent_moot)

        # ...and again, with manual sharding.
        shard = UniqueShard.get_or_create(kind1, str1)
        ent = Unique.get_raw_entity(kind1, str1, shard=shard)
        ent_moot = ent.get_and_increment()
        self.assertEqual(5, ent_moot)
        ent_moot = ent.get_and_increment()
        self.assertEqual(6, ent_moot)
Exemplo n.º 2
0
    def accept_multi(self, students): #pylint: disable=too-many-locals, too-many-branches, too-many-statements
        """
        This is about ten thousand kinds of overkill for a typical weekly run,
        but we're hitting a bottleneck during the initial indexing process.
        Shifting control from the model to here and making everything async
        increases the complexity, but the performance gains make it
        an acceptable tradeoff.
        """

        # First off, can we ignore any of the students that were provided?
        # An existing attr means that data was already found in the student cache.
        temp_students = []
        for student in students:
            if not getattr(student, self.student_label()):
                temp_students.append(student)

        shard_cache = {}
        entity_cache = {}
        new_shard_futures = {}
        new_entity_futures = {}

        # All of these arrays are aligned, safe to access by position while
        # looping through students.
        raw_strings = []
        shard_ids = []
        shard_futures = []
        shards = []
        entity_futures = []
        entities = []


        # Async pull any shards that already exist
        for student in temp_students:
            raw_str = self.generate(student)
            raw_strings.append(raw_str)
            shard_id = UniqueShard.get_id(self.kind, raw_str)
            shard_ids.append(shard_id)
            if shard_id not in shard_cache:
                shard_cache[shard_id] = UniqueShard.get_by_id_async(shard_id)
            shard_futures.append(shard_cache[shard_id])
        shard_cache = {}

        # Cash in all shard futures
        # Async put any shards that weren't found
        i = 0
        for student in temp_students:
            shards.append(shard_futures[i].get_result())
            if not shards[i]:
                new_shard_futures[i] = UniqueShard(id=shard_ids[i]).put_async()
            i += 1
        shard_ids = []

        # Cash in all futures for newly created shards
        # This probably looks a little weird -- get_async futures return an entity, but
        # put_async futures return a key instead.  We have to fiddle a bit so that
        # everything will be symmetrical when we loop through again.
        for i in new_shard_futures:
            new_shard_futures[i] = new_shard_futures[i].get_result().get_async()
        for i in new_shard_futures:
            shards[i] = new_shard_futures[i].get_result()
        new_shard_futures = {}

        # Async pull any entities that exist
        i = 0
        for student in temp_students:
            if raw_strings[i] not in entity_cache:
                entity_cache[raw_strings[i]] = Unique.get_by_id_async(
                        raw_strings[i], parent=shards[i].key)
            entity_futures.append(entity_cache[raw_strings[i]])
            i += 1
        shard_futures = []
        entity_cache = {}

        # Cash in all entity futures
        # Async put any entities that weren't found
        i = 0
        for student in temp_students:
            entities.append(entity_futures[i].get_result())
            if not entities[i]:
                new_entity_futures[i] = Unique(parent=shards[i].key, id=raw_strings[i]).put_async()
            i += 1
        shards = []

        # Cash in all futures for newly created entities
        for i in new_entity_futures:
            new_entity_futures[i] = new_entity_futures[i].get_result().get_async()
        for i in new_entity_futures:
            entities[i] = new_entity_futures[i].get_result()
        new_entity_futures = {}

        # Populate student objects
        i = 0
        for student in temp_students:
            new_str = ""
            unique_num = str(entities[i].get_and_increment(commit=False))
            if unique_num == "1" and self.suppress_first:
                new_str = raw_strings[i]
            else:
                new_str = raw_strings[i] + unique_num
            setattr(student, self.student_label(), new_str)
            student.cache_is_dirty = True
            i += 1
        entity_futures = []
        raw_strings = []

        # Async put all entities via our exporter object
        for entity in entities:
            self.exporter.append(entity)