def update(self): # Periodically clean up the running counts map to drop any times older # than 5 seconds. for executor in self.running_counts: new_set = set() for ts in self.running_counts[executor]: if time.time() - ts < 5: new_set.add(ts) self.running_counts[executor] = new_set # Clean up any backoff messages that were added more than 5 seconds ago # -- this should be enough to drain a queue. remove_set = set() for executor in self.backoff: if time.time() - self.backoff[executor] > 5: remove_set.add(executor) for executor in remove_set: del self.backoff[executor] executors = set(map(lambda status: status.ip, self.thread_statuses.values())) # Update the sets of keys that are being cached at each IP address. self.key_locations.clear() for ip in executors: key = get_cache_ip_key(ip) # This is of type LWWPairLattice, which has a StringSet protobuf # packed into it; we want the keys in that StringSet protobuf. lattice = self.kvs_client.get(key)[key] if lattice is None: # We will only get None if this executor is still joining; if # so, we just ignore this for now and move on. continue st = StringSet() st.ParseFromString(lattice.reveal()) for key in st.keys: if key not in self.key_locations: self.key_locations[key] = [] self.key_locations[key].append(ip)
def test_metadata_update(self): ''' This test calls the periodic metadata update protocol and ensures that the correct metadata is removed from the system and that the correct metadata is retrieved/updated from the KVS. ''' # Create two executor threads on separate machines. old_ip = '127.0.0.1' new_ip = '192.168.0.1' old_executor = (old_ip, 1) new_executor = (new_ip, 2) old_status = ThreadStatus() old_status.ip = old_ip old_status.tid = 1 old_status.running = True new_status = ThreadStatus() new_status.ip = new_ip new_status.tid = 2 new_status.running = True self.policy.thread_statuses[old_executor] = old_status self.policy.thread_statuses[new_executor] = new_status # Add two executors, one with old an old backoff and one with a new # time. self.policy.backoff[old_executor] = time.time() - 10 self.policy.backoff[new_executor] = time.time() # For the new executor, add 10 old running times and 10 new ones. self.policy.running_counts[new_executor] = set() for _ in range(10): time.sleep(.0001) self.policy.running_counts[new_executor].add(time.time() - 10) for _ in range(10): time.sleep(.0001) self.policy.running_counts[new_executor].add(time.time()) # Publish some caching metadata into the KVS for each executor. old_set = StringSet() old_set.keys.extend(['key1', 'key2', 'key3']) new_set = StringSet() new_set.keys.extend(['key3', 'key4', 'key5']) self.kvs_client.put(get_cache_ip_key(old_ip), LWWPairLattice(0, old_set.SerializeToString())) self.kvs_client.put(get_cache_ip_key(new_ip), LWWPairLattice(0, new_set.SerializeToString())) self.policy.update() # Check that the metadata has been correctly pruned. self.assertEqual(len(self.policy.backoff), 1) self.assertTrue(new_executor in self.policy.backoff) self.assertEqual(len(self.policy.running_counts[new_executor]), 10) # Check that the caching information is correct. self.assertTrue(len(self.policy.key_locations['key1']), 1) self.assertTrue(len(self.policy.key_locations['key2']), 1) self.assertTrue(len(self.policy.key_locations['key3']), 2) self.assertTrue(len(self.policy.key_locations['key4']), 1) self.assertTrue(len(self.policy.key_locations['key5']), 1) self.assertTrue(old_ip in self.policy.key_locations['key1']) self.assertTrue(old_ip in self.policy.key_locations['key2']) self.assertTrue(old_ip in self.policy.key_locations['key3']) self.assertTrue(new_ip in self.policy.key_locations['key3']) self.assertTrue(new_ip in self.policy.key_locations['key4']) self.assertTrue(new_ip in self.policy.key_locations['key5'])