def test_unique_put(self): # Ensure empty starting DB result = Unique().query().fetch(limit=None) self.assertFalse(result) # Test basic functionality with blocking DB calls kind1 = "foo" str1 = "bar" kind2 = "baz" str2 = "qux" result = Unique.get_next(kind1, str1) self.assertEqual(str1 + "1", result) result = Unique.get_next(kind1, str1) self.assertEqual(str1 + "2", result) result = Unique.get_next(kind1, str2) self.assertEqual(str2 + "1", result) result = Unique.get_next(kind2, str1) self.assertEqual(str1 + "1", result) # Test raw entity manipulation (no shard) ent = Unique.get_raw_entity(kind1, str1) ent_moot = ent.get_and_increment() self.assertEqual(3, ent_moot) ent_moot = ent.get_and_increment() self.assertEqual(4, ent_moot) # ...and again, with manual sharding. shard = UniqueShard.get_or_create(kind1, str1) ent = Unique.get_raw_entity(kind1, str1, shard=shard) ent_moot = ent.get_and_increment() self.assertEqual(5, ent_moot) ent_moot = ent.get_and_increment() self.assertEqual(6, ent_moot)
def accept_multi(self, students): #pylint: disable=too-many-locals, too-many-branches, too-many-statements """ This is about ten thousand kinds of overkill for a typical weekly run, but we're hitting a bottleneck during the initial indexing process. Shifting control from the model to here and making everything async increases the complexity, but the performance gains make it an acceptable tradeoff. """ # First off, can we ignore any of the students that were provided? # An existing attr means that data was already found in the student cache. temp_students = [] for student in students: if not getattr(student, self.student_label()): temp_students.append(student) shard_cache = {} entity_cache = {} new_shard_futures = {} new_entity_futures = {} # All of these arrays are aligned, safe to access by position while # looping through students. raw_strings = [] shard_ids = [] shard_futures = [] shards = [] entity_futures = [] entities = [] # Async pull any shards that already exist for student in temp_students: raw_str = self.generate(student) raw_strings.append(raw_str) shard_id = UniqueShard.get_id(self.kind, raw_str) shard_ids.append(shard_id) if shard_id not in shard_cache: shard_cache[shard_id] = UniqueShard.get_by_id_async(shard_id) shard_futures.append(shard_cache[shard_id]) shard_cache = {} # Cash in all shard futures # Async put any shards that weren't found i = 0 for student in temp_students: shards.append(shard_futures[i].get_result()) if not shards[i]: new_shard_futures[i] = UniqueShard(id=shard_ids[i]).put_async() i += 1 shard_ids = [] # Cash in all futures for newly created shards # This probably looks a little weird -- get_async futures return an entity, but # put_async futures return a key instead. We have to fiddle a bit so that # everything will be symmetrical when we loop through again. for i in new_shard_futures: new_shard_futures[i] = new_shard_futures[i].get_result().get_async() for i in new_shard_futures: shards[i] = new_shard_futures[i].get_result() new_shard_futures = {} # Async pull any entities that exist i = 0 for student in temp_students: if raw_strings[i] not in entity_cache: entity_cache[raw_strings[i]] = Unique.get_by_id_async( raw_strings[i], parent=shards[i].key) entity_futures.append(entity_cache[raw_strings[i]]) i += 1 shard_futures = [] entity_cache = {} # Cash in all entity futures # Async put any entities that weren't found i = 0 for student in temp_students: entities.append(entity_futures[i].get_result()) if not entities[i]: new_entity_futures[i] = Unique(parent=shards[i].key, id=raw_strings[i]).put_async() i += 1 shards = [] # Cash in all futures for newly created entities for i in new_entity_futures: new_entity_futures[i] = new_entity_futures[i].get_result().get_async() for i in new_entity_futures: entities[i] = new_entity_futures[i].get_result() new_entity_futures = {} # Populate student objects i = 0 for student in temp_students: new_str = "" unique_num = str(entities[i].get_and_increment(commit=False)) if unique_num == "1" and self.suppress_first: new_str = raw_strings[i] else: new_str = raw_strings[i] + unique_num setattr(student, self.student_label(), new_str) student.cache_is_dirty = True i += 1 entity_futures = [] raw_strings = [] # Async put all entities via our exporter object for entity in entities: self.exporter.append(entity)