def create_dag(dag_create_socket, pusher_cache, kvs, dags, policy, call_frequency, num_replicas=1): serialized = dag_create_socket.recv() dag = Dag() dag.ParseFromString(serialized) # We do not allow duplicate DAGs, so we return an error to the user if we # already know about this DAG. if dag.name in dags: sutils.error.error = DAG_ALREADY_EXISTS dag_create_socket.send(sutils.error.SerializeToString()) return logging.info('Creating DAG %s.' % (dag.name)) # We persist the DAG in the KVS, so other schedulers can read the DAG when # they hear about it. payload = LWWPairLattice(sutils.generate_timestamp(0), serialized) kvs.put(dag.name, payload) for fref in dag.functions: for _ in range(num_replicas): success = policy.pin_function(dag.name, fref) # The policy engine will only return False if it ran out of # resources on which to attempt to pin this function. if not success: logging.info(f'Creating DAG {dag.name} failed due to ' + 'insufficient resources.') sutils.error.error = NO_RESOURCES dag_create_socket.send(sutils.error.SerializeToString()) # Unpin any previously pinned functions because the operation # failed. policy.discard_dag(dag, True) return # Only create this metadata after all functions have been successfully # created. for fref in dag.functions: if fref.name not in call_frequency: call_frequency[fref.name] = 0 policy.commit_dag(dag.name) dags[dag.name] = (dag, utils.find_dag_source(dag)) dag_create_socket.send(sutils.ok_resp)
def dump_lattice(self, value, typ=None, causal_dependencies={}): if not typ: if isinstance(value, set): return self.dump_lattice(value, SetLattice) elif isinstance(value, dict): return self.dump_lattice(value, MapLattice) elif isinstance(value, list): return self.dump_lattice(value, OrderedSetLattice) else: return self.dump_lattice(value, LWWPairLattice) if typ == SetLattice: result = set() for v in value: result.add(self.dump(v)) result = SetLattice(result) elif typ == MapLattice: result = {} for key in value: result[key] = self.dump_lattice(value[key]) result = MapLattice(result) elif typ == OrderedSetLattice: result = list() for v in value: result.append(self.dump(v)) result = OrderedSetLattice(ListBasedOrderedSet(result)) elif typ == LWWPairLattice: result = LWWPairLattice(generate_timestamp(0), self.dump(value)) elif typ == SingleKeyCausalLattice: # We assume that we will use the default vector clock for causal # metadata. data = SetLattice({self.dump(value)}) result = SingleKeyCausalLattice(DEFAULT_VC, data) elif typ == MultiKeyCausalLattice: # We assume that we will use the default vector clock for causal # metadata. data = SetLattice({self.dump(value)}) result = MultiKeyCausalLattice(DEFAULT_VC, MapLattice(causal_dependencies), data) else: raise ValueError(f'Unexpected lattice type: {str(typ)}') return result
def create_function(func_create_socket, kvs, consistency=NORMAL): func = Function() func.ParseFromString(func_create_socket.recv()) name = sutils.get_func_kvs_name(func.name) logging.info('Creating function %s.' % (name)) if consistency == NORMAL: body = LWWPairLattice(sutils.generate_timestamp(0), func.body) kvs.put(name, body) else: skcl = SingleKeyCausalLattice(sutils.DEFAULT_VC, SetLattice({func.body})) kvs.put(name, skcl) funcs = utils.get_func_list(kvs, '', fullname=True) funcs.append(name) utils.put_func_list(kvs, funcs) func_create_socket.send(sutils.ok_resp)
def test_metadata_update(self): ''' This test calls the periodic metadata update protocol and ensures that the correct metadata is removed from the system and that the correct metadata is retrieved/updated from the KVS. ''' # Create two executor threads on separate machines. old_ip = '127.0.0.1' new_ip = '192.168.0.1' old_executor = (old_ip, 1) new_executor = (new_ip, 2) old_status = ThreadStatus() old_status.ip = old_ip old_status.tid = 1 old_status.running = True new_status = ThreadStatus() new_status.ip = new_ip new_status.tid = 2 new_status.running = True self.policy.thread_statuses[old_executor] = old_status self.policy.thread_statuses[new_executor] = new_status # Add two executors, one with old an old backoff and one with a new # time. self.policy.backoff[old_executor] = time.time() - 10 self.policy.backoff[new_executor] = time.time() # For the new executor, add 10 old running times and 10 new ones. self.policy.running_counts[new_executor] = set() for _ in range(10): time.sleep(.0001) self.policy.running_counts[new_executor].add(time.time() - 10) for _ in range(10): time.sleep(.0001) self.policy.running_counts[new_executor].add(time.time()) # Publish some caching metadata into the KVS for each executor. old_set = StringSet() old_set.keys.extend(['key1', 'key2', 'key3']) new_set = StringSet() new_set.keys.extend(['key3', 'key4', 'key5']) self.kvs_client.put(get_cache_ip_key(old_ip), LWWPairLattice(0, old_set.SerializeToString())) self.kvs_client.put(get_cache_ip_key(new_ip), LWWPairLattice(0, new_set.SerializeToString())) self.policy.update() # Check that the metadata has been correctly pruned. self.assertEqual(len(self.policy.backoff), 1) self.assertTrue(new_executor in self.policy.backoff) self.assertEqual(len(self.policy.running_counts[new_executor]), 10) # Check that the caching information is correct. self.assertTrue(len(self.policy.key_locations['key1']), 1) self.assertTrue(len(self.policy.key_locations['key2']), 1) self.assertTrue(len(self.policy.key_locations['key3']), 2) self.assertTrue(len(self.policy.key_locations['key4']), 1) self.assertTrue(len(self.policy.key_locations['key5']), 1) self.assertTrue(old_ip in self.policy.key_locations['key1']) self.assertTrue(old_ip in self.policy.key_locations['key2']) self.assertTrue(old_ip in self.policy.key_locations['key3']) self.assertTrue(new_ip in self.policy.key_locations['key3']) self.assertTrue(new_ip in self.policy.key_locations['key4']) self.assertTrue(new_ip in self.policy.key_locations['key5'])
def avg(fluent, uid, eid, num_execs, val): from anna.lattices import LWWPairLattice import cloudpickle as cp import numpy as np import random import time gstart = time.time() myid = fluent.getid() key = '%s:%d' % (uid, eid) fluent.put(key, LWWPairLattice(0, cp.dumps(myid))) procs = set() keyset = [] for i in range(num_execs): if i == eid: continue key = '%s:%d' % (uid, i) keyset.append(key) locs = fluent.get(keyset) while None in locs.values(): locs = fluent.get(keyset) for key in locs: procs.add(cp.loads(locs[key].reveal()[1])) curr_val = val curr_weight = 1 curr_avg = None val_msgs = [curr_val] weight_msgs = [curr_weight] rounds = 0 NUM_ROUNDS = 5 while rounds < NUM_ROUNDS: curr_val = np.sum(val_msgs) curr_weight = np.sum(weight_msgs) dst = random.sample(procs, 1)[0] fluent.send(dst, cp.dumps((curr_val * .5, curr_weight * .5))) val_msgs.clear() weight_msgs.clear() val_msgs.append(curr_val * .5) weight_msgs.append(curr_weight * .5) start = time.time() while time.time() - start < .1: msgs = fluent.recv() for msg in msgs: msg = cp.loads(msg[1]) val_msgs.append(msg[0]) weight_msgs.append(msg[1]) new_avg = curr_val / curr_weight curr_avg = new_avg rounds += 1 return curr_avg
def run(droplet_client, num_requests, sckt): ''' UPLOAD THE MODEL OBJECT ''' model_key = 'mobilenet-model' label_key = 'mobilenet-label-map' with open('model/label_map.json', 'rb') as f: bts = f.read() lattice = LWWPairLattice(0, bts) droplet_client.kvs_client.put(label_key, lattice) with open('model/mobilenet_v2_1.4_224_frozen.pb', 'rb') as f: bts = f.read() lattice = LWWPairLattice(0, bts) droplet_client.kvs_client.put(model_key, lattice) ''' DEFINE AND REGISTER FUNCTIONS ''' def preprocess(droplet, inp): from skimage import filters return filters.gaussian(inp).reshape(1, 224, 224, 3) class Mobilenet: def __init__(self, droplet, model_key, label_map_key): import tensorflow as tf import json tf.enable_eager_execution() self.model = droplet.get(model_key, deserialize=False) self.label_map = json.loads( droplet.get(label_map_key, deserialize=False)) self.gd = tf.GraphDef.FromString(self.model) self.inp, self.predictions = tf.import_graph_def( self.gd, return_elements=[ 'input:0', 'MobilenetV2/Predictions/Reshape_1:0' ]) def run(self, droplet, img): # load libs import tensorflow as tf from PIL import Image from io import BytesIO import base64 import numpy as np import json tf.enable_eager_execution() # load image and model # img = np.array(Image.open(BytesIO(base64.b64decode(img))).resize((224, 224))).astype(np.float) / 128 - 1 with tf.Session(graph=self.inp.graph): x = self.predictions.eval(feed_dict={self.inp: img}) return x def average(droplet, inp): import numpy as np inp = [ inp, ] return np.mean(inp, axis=0) cloud_prep = droplet_client.register(preprocess, 'preprocess') cloud_mnet = droplet_client.register((Mobilenet, (model_key, label_key)), 'mnet') cloud_average = droplet_client.register(average, 'average') if cloud_prep and cloud_mnet and cloud_average: print('Successfully registered preprocess, mnet, and average ' + 'functions.') else: sys.exit(1) ''' TEST REGISTERED FUNCTIONS ''' arr = np.random.randn(1, 224, 224, 3) prep_test = cloud_prep(arr).get() if type(prep_test) != np.ndarray: print('Unexpected result from preprocess(arr): %s' % (str(prep_test))) sys.exit(1) mnet_test = cloud_mnet(prep_test).get() if type(mnet_test) != np.ndarray: print('Unexpected result from mobilenet(arr): %s' % (str(mnet_test))) sys.exit(1) average_test = cloud_average(mnet_test).get() if type(average_test) != np.ndarray: print('Unexpected result from average(arr): %s' % (str(average_test))) sys.exit(1) print('Successfully tested functions!') ''' CREATE DAG ''' dag_name = 'mnet' functions = ['preprocess', 'mnet', 'average'] connections = [('preprocess', 'mnet'), ('mnet', 'average')] success, error = droplet_client.register_dag(dag_name, functions, connections) if not success: print('Failed to register DAG: %s' % (str(error))) sys.exit(1) ''' RUN DAG ''' total_time = [] # Create all the input data oids = [] for _ in range(num_requests): arr = np.random.randn(1, 224, 224, 3) oid = str(uuid.uuid4()) oids.append(oid) droplet_client.put_object(oid, arr) for i in range(num_requests): oid = oids[i] arg_map = {'preprocess': [DropletReference(oid, True)]} start = time.time() droplet_client.call_dag(dag_name, arg_map, True) end = time.time() total_time += [end - start] if sckt: sckt.send(cp.dumps(total_time)) return total_time, [], [], 0
def _deserialize(self, tup): if tup.lattice_type == LWW: # Deserialize last-writer-wins lattices val = LWWValue() val.ParseFromString(tup.payload) return LWWPairLattice(val.timestamp, val.value) elif tup.lattice_type == SET: # Deserialize unordered-set lattices s = SetValue() s.ParseFromString(tup.payload) result = set() for k in s.values: result.add(k) return SetLattice(result) elif tup.lattice_type == ORDERED_SET: # Deserialize ordered-set lattices res = ListBasedOrderedSet() val = SetValue() val.ParseFromString(tup.payload) for v in val.values: res.insert(v) return OrderedSetLattice(res) elif tup.lattice_type == SINGLE_CAUSAL: # Deserialize single-key causal lattices val = SingleKeyCausalValue() # Deserialize the vector_clock stored in the Protobuf into a # MapLattice, where each value is a MaxIntLattice of the VC # counter. vc = VectorClock(val.vector_clock, True) # Create a SetLattice with the value(s) stored by this lattice. values = set() for v in val.values(): values.add(v) return SingleKeyCasaulLattice(vc, SetLattice(values)) elif tup.lattice_type == MULTI_CAUSAL: # Deserialize multi-key causal lattices val = MultiKeyCausalValue() # Deserialize the vector_clock stored in the Protobuf into a # MapLattice, where each value is a MaxIntLattice of the VC # counter. vc = VectorClock(val.vector_clock, True) # Deserialize the set of dependencies of this key into a MapLattice # where the keys are names of other KVS keys and the values are # MapLattices that have the vector clocks for those keys. dep_map = {} for kv in val.dependencies: key = kv.key dep_map[key] = VectorClock(kv.vector_clock, True) # Create a SetLattice with the value(s) stored by this lattice. values = set() for v in val.values(): values.add(v) dependencies = MapLattice(dep_map) value = SetLattice(values) return MultiKeyCausalLattice(vc, dependencies, value) else: raise ValueError('Unsupported type cannot be serialized: ' + str(tup.lattice_type))
def summa(fluent, uid, lblock, rblock, rid, cid, numrows, numcols): import cloudpickle as cp from anna.lattices import LWWPairLattice import time gstart = time.time() bsize = lblock.shape[0] ssize = 100 res = np.zeros((bsize, bsize)) myid = fluent.getid() key = '%s: (%d, %d)' % (uid, rid, cid) fluent.put(key, LWWPairLattice(0, cp.dumps(myid))) start = time.time() proc_locs = {} keyset = [] idset = {} for i in range(numrows): if i == rid: continue key = '%s: (%d, %d)' % (uid, i, cid) keyset.append(key) idset[key] = (i, cid) for j in range(numcols): if j == cid: continue key = '%s: (%d, %d)' % (uid, rid, j) keyset.append(key) idset[key] = (rid, j) locs = fluent.get(keyset) while None in locs.values(): locs = fluent.get(keyset) for key in locs: loc = idset[key] proc_locs[loc] = cp.loads(locs[key].reveal()[1]) end = time.time() gtime = end - gstart start = time.time() for c in range(numcols): if c == cid: continue for k in range(int(bsize / ssize)): dest = proc_locs[(rid, c)] send_id = ('l', k + (bsize * cid)) msg = cp.dumps((send_id, lblock[:, (k * ssize): ((k+1) * ssize)])) fluent.send(dest, msg) for r in range(numrows): if r == rid: continue for k in range(int(bsize / ssize)): dest = proc_locs[(r, cid)] send_id = ('r', k + (bsize * rid)) msg = cp.dumps((send_id, rblock[(k * ssize):((k+1) * ssize), :])) fluent.send(dest, msg) end = time.time() stime = end - start num_recvs = (((numrows - 1) * bsize) / ssize) * 2 recv_count = 0 left_recvs = {} right_recvs = {} start = time.time() for l in range(int(bsize / ssize)): left_recvs[l + (bsize * cid)] = lblock[:, (l * ssize):((l+1) * ssize)] for r in range(int(bsize / ssize)): right_recvs[r + (bsize * rid)] = rblock[(r * ssize): ((r+1) * ssize), :] while recv_count < num_recvs: msgs = fluent.recv() recv_count += (len(msgs)) for msg in msgs: _, body = msg body = cp.loads(body) send_id = body[0] if send_id[0] == 'l': col = body[1] key = send_id[1] left_recvs[key] = col if key in right_recvs: match_vec = right_recvs[key] res = np.add(np.matmul(col, match_vec), res) del right_recvs[key] del left_recvs[key] if send_id[0] == 'r': row = body[1] key = send_id[1] right_recvs[key] = row if key in left_recvs: match_vec = left_recvs[key] res = np.add(np.matmul(match_vec, row), res) del right_recvs[key] del left_recvs[key] for key in left_recvs: left = left_recvs[key] right = right_recvs[key] logging.info(left.shape) logging.info(right.shape) res = np.add(res, np.matmul(left, right)) end = time.time() ctime = end - start return res, gtime, stime, ctime, (end - gstart)
def run(flconn, kvs, num_requests, sckt): ''' DEFINE AND REGISTER FUNCTIONS ''' def summa(fluent, uid, lblock, rblock, rid, cid, numrows, numcols): import cloudpickle as cp from anna.lattices import LWWPairLattice import time gstart = time.time() bsize = lblock.shape[0] ssize = 100 res = np.zeros((bsize, bsize)) myid = fluent.getid() key = '%s: (%d, %d)' % (uid, rid, cid) fluent.put(key, LWWPairLattice(0, cp.dumps(myid))) start = time.time() proc_locs = {} keyset = [] idset = {} for i in range(numrows): if i == rid: continue key = '%s: (%d, %d)' % (uid, i, cid) keyset.append(key) idset[key] = (i, cid) for j in range(numcols): if j == cid: continue key = '%s: (%d, %d)' % (uid, rid, j) keyset.append(key) idset[key] = (rid, j) locs = fluent.get(keyset) while None in locs.values(): locs = fluent.get(keyset) for key in locs: loc = idset[key] proc_locs[loc] = cp.loads(locs[key].reveal()[1]) end = time.time() gtime = end - gstart start = time.time() for c in range(numcols): if c == cid: continue for k in range(int(bsize / ssize)): dest = proc_locs[(rid, c)] send_id = ('l', k + (bsize * cid)) msg = cp.dumps((send_id, lblock[:, (k * ssize): ((k+1) * ssize)])) fluent.send(dest, msg) for r in range(numrows): if r == rid: continue for k in range(int(bsize / ssize)): dest = proc_locs[(r, cid)] send_id = ('r', k + (bsize * rid)) msg = cp.dumps((send_id, rblock[(k * ssize):((k+1) * ssize), :])) fluent.send(dest, msg) end = time.time() stime = end - start num_recvs = (((numrows - 1) * bsize) / ssize) * 2 recv_count = 0 left_recvs = {} right_recvs = {} start = time.time() for l in range(int(bsize / ssize)): left_recvs[l + (bsize * cid)] = lblock[:, (l * ssize):((l+1) * ssize)] for r in range(int(bsize / ssize)): right_recvs[r + (bsize * rid)] = rblock[(r * ssize): ((r+1) * ssize), :] while recv_count < num_recvs: msgs = fluent.recv() recv_count += (len(msgs)) for msg in msgs: _, body = msg body = cp.loads(body) send_id = body[0] if send_id[0] == 'l': col = body[1] key = send_id[1] left_recvs[key] = col if key in right_recvs: match_vec = right_recvs[key] res = np.add(np.matmul(col, match_vec), res) del right_recvs[key] del left_recvs[key] if send_id[0] == 'r': row = body[1] key = send_id[1] right_recvs[key] = row if key in left_recvs: match_vec = left_recvs[key] res = np.add(np.matmul(match_vec, row), res) del right_recvs[key] del left_recvs[key] for key in left_recvs: left = left_recvs[key] right = right_recvs[key] logging.info(left.shape) logging.info(right.shape) res = np.add(res, np.matmul(left, right)) end = time.time() ctime = end - start return res, gtime, stime, ctime, (end - gstart) cloud_summa = flconn.register(summa, 'summa') if cloud_summa: print('Successfully registered summa function.') else: sys.exit(1) ''' TEST REGISTERED FUNCTIONS ''' n = 10000 inp1 = np.random.randn(n, n) inp2 = np.random.randn(n, n) nt = 5 nr = nt nc = nt bsize = int(n / nr) def get_block(arr, row, col, bsize): row_start = row * bsize row_end = (row + 1) * bsize col_start = col * bsize col_end = (col + 1) * bsize return arr[row_start:row_end, col_start:col_end] latencies = [] for _ in range(num_requests): time.sleep(.1) uid = str(uuid.uuid4()) rids = {} left_id_map = {} right_id_map = {} for r in range(nr): for c in range(nc): lblock = get_block(inp1, r, c, bsize) rblock = get_block(inp2, r, c, bsize) id1 = str(uuid.uuid4()) id2 = str(uuid.uuid4()) kvs.put(id1, LWWPairLattice(0, serialize_val(lblock))) kvs.put(id2, LWWPairLattice(0, serialize_val(rblock))) left_id_map[(r, c)] = id1 right_id_map[(r, c)] = id2 start = time.time() for r in range(nr): for c in range(nc): r1 = FluentReference(left_id_map[(r, c)], LWW, True) r2 = FluentReference(right_id_map[(r, c)], LWW, True) rids[(r, c)] = cloud_summa(uid, r1, r2, r, c, nr, nc) end = time.time() print('Scheduling to %.6f seconds.' % (end - start)) result = np.zeros((n, n)) get_times = [] send_times = [] comp_times = [] total_times = [] for key in rids: lstart = time.time() res = rids[key].get() lend = time.time() get_times.append(res[1]) send_times.append(res[2]) comp_times.append(res[3]) total_times.append(res[4]) res = res[0] r = key[0] c = key[1] result[(r * bsize):((r + 1) * bsize), (c * bsize):((c + 1) * bsize)] = res end = time.time() latencies.append(end - start) if False in np.isclose(result, np.matmul(inp1, inp2)): print('Failure!') return latencies, [], [], 0
def summa(fluent, uid, lblock, rblock, rid, cid, numrows, numcols): import cloudpickle as cp from anna.lattices import LWWPairLattice bsize = lblock.shape[0] res = np.zeros((bsize, bsize)) myid = fluent.getid() key = '%s: (%d, %d)' % (uid, rid, cid) fluent.put(key, LWWPairLattice(0, cp.dumps(myid))) proc_locs = {} for i in range(numrows): if i == rid: continue key = '%s: (%d, %d)' % (uid, i, cid) loc = fluent.get(key) while loc is None: loc = fluent.get(key) proc_locs[(i, cid)] = cp.loads(loc.reveal()[1]) for j in range(numcols): if j == cid: continue key = '%s: (%d, %d)' % (uid, rid, j) loc = fluent.get(key) while loc is None: loc = fluent.get(key) proc_locs[(rid, j)] = cp.loads(loc.reveal()[1]) for c in range(numcols): if c == cid: continue for k in range(bsize): dest = proc_locs[rid, c] send_id = ('l', k + (bsize * cid)) msg = cp.dumps((send_id, lblock[:, k])) fluent.send(dest, msg) for r in range(numrows): if r == rid: continue for k in range(bsize): dest = proc_locs[r, cid] send_id = ('r', k + (bsize * rid)) msg = cp.dumps((send_id, rblock[k, :])) fluent.send(dest, msg) num_recvs = (numrows - 1) * bsize + (numcols - 1) * bsize recv_count = 0 left_recvs = {} right_recvs = {} for l in range(bsize): left_recvs[l + (bsize * cid)] = lblock[:, l] for r in range(bsize): right_recvs[r + (bsize * rid)] = rblock[r, :] while recv_count < num_recvs: msgs = fluent.recv() recv_count += (len(msgs)) for msg in msgs: _, body = msg body = cp.loads(body) send_id = body[0] if send_id[0] == 'l': col = body[1] key = send_id[1] left_recvs[key] = col if key in right_recvs: match_vec = right_recvs[key] res = np.add(np.outer(col, match_vec), res) del right_recvs[key] del left_recvs[key] if send_id[0] == 'r': row = body[1] key = send_id[1] right_recvs[key] = row if key in left_recvs: match_vec = left_recvs[key] res = np.add(np.outer(match_vec, row), res) del right_recvs[key] del left_recvs[key] for key in left_recvs: left = left_recvs[key] right = right_recvs[key] res = np.add(res, np.outer(left, right)) return res
for i in range(0, pickle_len, 4000): j = (i + 4000) if ((i + 4000) < pickle_len) else pickle_len pickle_arr.append(temp_pickle_string[i:j]) print(temp_super) # print(len(list(temp_super.data))) # print(repr(temp_string)) # with open("temp_string_holder.txt", "w") as file: # file.write(temp_string) # print(temp_pickle_string) base_string = model_str bash_bytes_string = model_str + "#Index" i = 0 for temp_string_iter in pickle_arr: key_string = base_string + "#" + str(i) value_bytes = LWWPairLattice(int(time.time()), temp_string_iter.encode()) client.put(key_string, value_bytes) print(key_string) # print(temp_string_iter) # print("") i += 1 i_string = str(i) i_bytes = LWWPairLattice(int(time.time()), i_string.encode()) client.put(bash_bytes_string, i_bytes) i_two = int((((client.get(bash_bytes_string))[bash_bytes_string]).reveal()).decode()) temp_pickle_ret = "" for index in range(i_two): temp_string_iter_two = pickle_arr[index] key_string_two = base_string + "#" + str(index) ret = (((client.get(key_string_two))[key_string_two]).reveal()).decode()