def test_multiple_callset_id(self): def wait_x_sec_and_plus_one(x): time.sleep(x) return x + 1 N = 10 x = np.arange(N) pywx = pywren.default_executor() futures1 = pywx.map(wait_x_sec_and_plus_one, x) futures2 = pywx.map(wait_x_sec_and_plus_one, x) fs_dones, fs_notdones = pywren.wait( futures1 + futures2, return_when=pywren.wren.ALL_COMPLETED) res = np.array([f.result() for f in fs_dones]) np.testing.assert_array_equal(res, np.concatenate((x, x)) + 1)
def test_no_shard(self): config = pywren.wrenconfig.default() old_key = config['runtime']['s3_key'] prefix, tar_gz = os.path.split(old_key) # Use the staging key to test as it doesn't have shards config['runtime']['s3_key'] = os.path.join("pywren.runtime.staging", tar_gz) wrenexec = pywren.default_executor(config=config) def test_func(x): return x + 1 future = wrenexec.call_async(test_func, 7) result = future.result() base_runtime_key = config['runtime']['s3_key'] self.assertEqual(future.run_status['runtime_s3_key_used'], base_runtime_key)
def test_multiple_shard_cholesky(self): np.random.seed(1) size = 128 shard_size = 64 np.random.seed(1) print("Generating X") executor = fs.ProcessPoolExecutor(cpu_count) X = np.random.randn(size, 128) print("Generating A") A = X.dot(X.T) + np.eye(X.shape[0]) y = np.random.randn(size) pwex = pywren.default_executor() print("sharding A") shard_sizes = (shard_size, shard_size) A_sharded = BigSymmetricMatrix("cholesky_test_A", shape=A.shape, shard_sizes=shard_sizes) y_sharded = BigMatrix("cholesky_test_y", shape=y.shape, shard_sizes=shard_sizes[:1]) A_sharded.free() y_sharded.free() A_sharded = BigSymmetricMatrix("cholesky_test_A", shape=A.shape, shard_sizes=shard_sizes) y_sharded = BigMatrix("cholesky_test_y", shape=y.shape, shard_sizes=shard_sizes[:1]) t = time.time() shard_matrix(A_sharded, A, executor=executor) e = time.time() print("A_sharded", e - t) t = time.time() shard_matrix(y_sharded, y, executor=executor) e = time.time() print("y_sharded time", e - t) print("Computing LL^{T}") L = cholesky(A) print(L) L_sharded = uops.chol(pwex, A_sharded) L_sharded_local = L_sharded.numpy() print(L_sharded_local) print(L) print("L_{infty} difference ", np.max(np.abs(L_sharded_local - L))) assert (np.allclose(L, L_sharded_local)) os.system("rm -rf /dev/shm/*")
def random_delay(): TOTAL_ITER = 5 JOB_N = 100 LOG_FILENAME = "benchmark_futures_data.{}.{}.log".format(TOTAL_ITER, JOB_N) try: os.remove(LOG_FILENAME) except OSError: pass daiquiri.setup(outputs=[daiquiri.output.File(LOG_FILENAME)]) iw = logging.getLogger('pywrenext.iterwren.iterwren') iw.setLevel('DEBUG') t1 = time.time() def offset_counter(k, x_k, offset): time.sleep(np.random.randint(10, 30)) if k == 0: return offset else: return x_k + 1 wrenexec = pywren.default_executor() with pywrenext.iterwren.IterExec(wrenexec) as IE: iter_futures = IE.map(offset_counter, TOTAL_ITER, range(JOB_N), save_iters=True) pywrenext.iterwren.wait_exec(IE) iter_futures_hist = [f.iter_hist for f in iter_futures] t2 = time.time() pickle.dump( { 'iter_futures_hist': iter_futures_hist, 'TOTAL_ITER': TOTAL_ITER, 'time': t2 - t1 }, open("benchmark_futures_data.{}.random.pickle".format(TOTAL_ITER), 'wb'), -1)
def test_any_complete(self): def wait_x_sec_and_plus_one(x): time.sleep(x) return x + 1 N = 10 x = np.arange(N) futures = pywren.default_executor().map(wait_x_sec_and_plus_one, x) fs_notdones = futures while (len(fs_notdones) > 0): fs_dones, fs_notdones = pywren.wait(fs_notdones, return_when=pywren.wren.ANY_COMPLETED, WAIT_DUR_SEC=1) self.assertTrue(len(fs_dones) > 0) res = np.array([f.result() for f in futures]) np.testing.assert_array_equal(res, x+1)
def test_single_shard_cholesky(self): X = np.random.randn(4, 4) A = X.dot(X.T) + np.eye(X.shape[0]) y = np.random.randn(16) pwex = pywren.default_executor() A_sharded = BigMatrix("cholesky_test_A", shape=A.shape, shard_sizes=A.shape) y_sharded = BigMatrix("cholesky_test_y", shape=y.shape, shard_sizes=y.shape) shard_matrix(A_sharded, A) shard_matrix(y_sharded, y) L_sharded = uops.chol(pwex, A_sharded) L_sharded_local = L_sharded.numpy() L = cholesky(A) assert (np.allclose(L, L_sharded_local)) os.system("rm -rf /dev/shm/*")
def test_shard(self): config = pywren.wrenconfig.default() old_key = config['runtime']['s3_key'] prefix, tar_gz = os.path.split(old_key) # Use a runtime that has shards config['runtime']['s3_key'] = os.path.join("pywren.runtime", tar_gz) wrenexec = pywren.default_executor(config=config) def test_func(x): return x + 1 base_runtime_key = wrenexec.runtime_key future = wrenexec.call_async(test_func, 7) result = future.result() # NOTE: There is some probability we will hit the base key ? self.assertNotEqual(future.run_status['runtime_s3_key_used'], base_runtime_key)
def read(bucket_name, number, keylist_raw, read_times, region): blocksize = 1024 * 1024 def run_command(key): client = boto3.client('s3', region) m = hashlib.md5() bytes_read = 0 t1 = time.time() for i in range(read_times): obj = client.get_object(Bucket=bucket_name, Key=key) fileobj = obj['Body'] buf = fileobj.read(blocksize) while len(buf) > 0: bytes_read += len(buf) m.update(buf) buf = fileobj.read(blocksize) t2 = time.time() a = m.hexdigest() mb_rate = bytes_read / (t2 - t1) / 1e6 return t1, t2, mb_rate, bytes_read, a wrenexec = pywren.default_executor() if number == 0: keylist = keylist_raw else: keylist = [keylist_raw[i % len(keylist_raw)] for i in range(number)] futures = wrenexec.map(run_command, keylist) results = [f.result() for f in futures] run_statuses = [f.run_status for f in futures] invoke_statuses = [f.invoke_status for f in futures] res = { 'results': results, 'run_statuses': run_statuses, 'invoke_statuses': invoke_statuses } return res
def read(bucket_name, number, outfile, key_file, s3_key, read_times, region): if key_file is None and s3_key is None: print "must specify either a single key to repeatedly read ( --s3_key) or a text file with keynames (--key_file)" sys.exit(1) blocksize = 1024*1024 def run_command(key): client = boto3.client('s3', region) m = hashlib.md5() bytes_read = 0 t1 = time.time() for i in range(read_times): obj = client.get_object(Bucket=bucket_name, Key=key) fileobj = obj['Body'] buf = fileobj.read(blocksize) while len(buf) > 0: bytes_read += len(buf) m.update(buf) buf = fileobj.read(blocksize) t2 = time.time() a = m.hexdigest() mb_rate = bytes_read/(t2-t1)/1e6 return t1, t2, mb_rate, wrenexec = pywren.default_executor() if s3_key is not None: keylist = [s3_key] * number else: fid = open(key_file, 'r') keylist_raw = [k.strip() for k in fid.readlines()] keylist = [keylist_raw[i % len(keylist_raw)] for i in range(number)] fut = wrenexec.map(run_command, keylist) res = [f.result() for f in fut] pickle.dump(res, open('s3_benchmark.read.output.pickle', 'w'))
def encrypt_all_keys_in_prefix(bucket, prefix, encrypt_out, strip_string, use_pywren): keys = utils.list_all_keys(prefix) if (use_pywren): chunked_keys = utils.chunks(keys, 500) def pywren_job(key_chunk): for key in key_chunk: utils.encrypt_s3_copy_key(key, bucket, encrypt_out, strip_string) return 0 config = wc.default() config['runtime']['s3_bucket'] = 'imagenet2datav2' config['runtime']['s3_key'] = 'pywren.runtime/pywren_runtime-3.6-imagenet2.tar.gz' pwex = pywren.default_executor(config=config) print(f"Submitting jobs for {len(keys)} keys") futures = pwex.map(pywren_job, chunked_keys, exclude_modules=["site-packages/"]) pywren.wait(futures) [f.result() for f in futures] else: for key in keys: utils.encrypt_s3_copy_key(key, bucket, encrypt_out, strip_string)
def test_single_shard_gemv(self): X = np.random.randn(16, 16) Y = np.random.randn(16) X_sharded = BigMatrix("gemv_test_0", shape=X.shape, shard_sizes=X.shape) Y_sharded = BigMatrix("gemv_test_2", shape=Y.shape, shard_sizes=Y.shape) shard_matrix(X_sharded, X) pwex = pywren.default_executor() XY_sharded = binops.gemv(pwex, X_sharded, Y_sharded, X_sharded.bucket, 1) XY_sharded_local = XY_sharded.numpy() XY = X.dot(Y) print(XY) print(XY_sharded_local) X_sharded.free() XY_sharded.free() assert (np.all(np.isclose(XY, XY_sharded_local)))
def start(self, parallel=False): put(self.control_plane.client, self.hash, PS.RUNNING.value) print("len starters", len(self.program.starters)) chunked_starters = chunk(self.program.starters, 100) def start_chunk(c): sqs = boto3.resource('sqs') queue = sqs.Queue(self.queue_urls[0]) for x in c: self.set_node_status(*x, NS.READY) queue.send_message(MessageBody=json.dumps( [x[0], {str(key): val for key, val in x[1].items()}])) if (parallel): pwex = pywren.default_executor() futures = pwex.map(start_chunk, chunked_starters) pywren.wait(futures) else: for c in chunked_starters: start_chunk(c) return 0
def test_multiple_shard_matrix_gemv(self): X = np.random.randn(16, 16) Y = np.random.randn(16, 1) shard_sizes_0 = tuple(map(int, np.array(X.shape) / 2)) shard_sizes_1 = (Y.shape[0], 1) X_sharded = BigMatrix("gemv_test_1", shape=X.shape, shard_sizes=shard_sizes_0) Y_sharded = BigMatrix("gemv_test_2", shape=Y.shape, shard_sizes=shard_sizes_1) shard_matrix(X_sharded, X) shard_matrix(Y_sharded, Y) pwex = pywren.default_executor() XY_sharded = binops.gemv(pwex, X_sharded, Y_sharded, X_sharded.bucket, 1) XY_sharded_local = XY_sharded.numpy() XY = X.dot(Y) X_sharded.free() Y_sharded.free() XY_sharded.free() assert (np.all(np.isclose(XY, XY_sharded_local)))
def write(bucket_name, mb_per_file, number, key_prefix, outfile, key_file, region): print "bucket_name =", bucket_name print "mb_per_file =", mb_per_file print "number=", number print "key_prefix=", key_prefix def run_command(key_name): bytes_n = mb_per_file * 1024**2 d = exampleutils.RandomDataGenerator(bytes_n) client = boto3.client('s3', region) t1 = time.time() client.put_object(Bucket=bucket_name, Key = key_name, Body=d) t2 = time.time() mb_rate = bytes_n/(t2-t1)/1e6 return t1, t2, mb_rate wrenexec = pywren.default_executor() # create list of random keys keynames = [ key_prefix + str(uuid.uuid4().get_hex().upper()) for _ in range(number)] if key_file is not None: fid = open(key_file, 'w') for k in keynames: fid.write("{}\n".format(k)) fut = wrenexec.map(run_command, keynames) res = [f.result() for f in fut] pickle.dump(res, open(outfile, 'w'))
def write(bucket_name, mb_per_file, number, key_prefix, region): def run_command(key_name): bytes_n = mb_per_file * 1024**2 d = exampleutils.RandomDataGenerator(bytes_n) client = boto3.client('s3', region) t1 = time.time() client.put_object(Bucket=bucket_name, Key = key_name, Body=d) t2 = time.time() mb_rate = bytes_n/(t2-t1)/1e6 return t1, t2, mb_rate wrenexec = pywren.default_executor(shard_runtime=True) # create list of random keys keynames = [ key_prefix + str(uuid.uuid4().get_hex().upper()) for _ in range(number)] futures = wrenexec.map(run_command, keynames) results = [f.result() for f in futures] run_statuses = [f.run_status for f in futures] invoke_statuses = [f.invoke_status for f in futures] res = {'results' : results, 'run_statuses' : run_statuses, 'bucket_name' : bucket_name, 'keynames' : keynames, 'invoke_statuses' : invoke_statuses} return res
def test_map_item_limit(self): TOO_BIG_COUNT = 100 conf = pywren.wrenconfig.default() if 'scheduler' not in conf: conf['scheduler'] = {} conf['scheduler']['map_item_limit'] = TOO_BIG_COUNT wrenexec = pywren.default_executor(config=conf) def plus_one(x): return x + 1 N = 10 x = np.arange(N) futures = wrenexec.map(plus_one, x) pywren.get_all_results(futures) # now too big with pytest.raises(ValueError) as excinfo: x = np.arange(TOO_BIG_COUNT+1) futures = wrenexec.map(plus_one, x )
def setUp(self): self.wrenexec = pywren.default_executor()
def benchmark(bucket_name, key_prefix, workers, outfile, region, begin_delay): start_time = time.time() print "bucket_name =", bucket_name host_start_time = time.time() wait_until = host_start_time + begin_delay item_name = "net_benchmark_test_" + str(uuid.uuid4().get_hex().upper()) def run_command(isServer): # get timing offset timing_offsets = exampleutils.get_time_offset(NTP_SERVER, 4) # first pause (for sync) sleep_duration = wait_until - time.time() if sleep_duration > 0: time.sleep(sleep_duration) # start the job job_start = time.time() sdb_client = boto3.client('sdb', region) #response2 = requests.get("http://169.254.169.254/latest/meta-data/local-hostname") # s = socket.socket( # socket.AF_INET, socket.SOCK_STREAM) # #now connect to the web server on port 80 # # - the normal http port # s.connect(("www.berkeley.edu", 80)) # s.send('GET /') # response = s.recv(4096) if isServer: hw_addr, ip_addrs = exampleutils.get_ifconfig() write_dict = {'hw_addr' : hw_addr, 'ip' : ip_addrs[1]} write_attr = exampleutils.dict_to_sdb_attr(write_dict, True) response = sdb_client.put_attributes(DomainName = domain_name, ItemName = item_name, Attributes = write_attr) server = SocketServer.TCPServer((ip_addrs[1], PORT), MyTCPHandler) server.handle_request() # I think this just handles one response = "handled one?" else: server_ip = None response = "" while server_ip is None: resp = sdb_client.get_attributes( DomainName=domain_name, ItemName=item_name, AttributeNames=['hw_addr', 'ip'], ConsistentRead=True) if 'Attributes' in resp: a = exampleutils.sdb_attr_to_dict(resp['Attributes']) server_ip = a['ip'] response = "server IP is {}".format(server_ip) time.sleep(4) s = socket.socket( socket.AF_INET, socket.SOCK_STREAM) s.connect((server_ip, PORT)) s.send("hello world this is a bunch of text") response = s.recv(16) job_end = time.time() return {'response' : response} print "starting transactions" wrenexec = pywren.default_executor() fut = wrenexec.map(run_command, [False, True]) # range(workers)) print "launch took {:3.2f} sec".format(time.time()-host_start_time) # local_sdb_client = boto3.client('sdb', region) # for i in range(1): # resp = local_sdb_client.get_attributes( # DomainName=domain_name, # ItemName=item_name, # AttributeNames=['hw_addr', 'ip'], # ConsistentRead=True) # print resp # if 'Attributes' in resp: # print "WHEEE", resp['Attributes'] # time.sleep(1) res = [f.result(throw_except=False) for f in fut] print res pickle.dump({ 'host_start_time' : host_start_time, 'begin_delay' : begin_delay, 'workers' : workers, 'res' : res}, open(outfile, 'w'))
def write_data(): def run_command(key): """ keylist.append({'taskId': i, 'job_number': job_number, 'total_input': numTasks, 'write_element_size': write_element_size, 'process_time': process_time, 'total_time': total_time, 'redis': redisnode}) """ pywren.wrenlogging.default_config('INFO') begin_of_function = time.time() logger = logging.getLogger(__name__) logger.info("taskId = " + str(key['taskId'])) taskId = key['taskId'] jobid_int = int(key['job_number']) write_element_size = int(key['write_element_size']) process_time = int(key['process_time']) total_time = int(key['total_time']) rs = [] #for hostname in key['redis'].split(";"): # r1 = StrictRedis(host=hostname, port=6379, db=0).pipeline() # rs.append(r1) #r1 = StrictRedis(host="172.31.12.131", port=6379, db=0).pipeline() #rs.append(r1) #nrs = len(rs) nrs = 1 [read_time, work_time, write_time] = [0] * 3 start_time = time.time() # a total of 10 threads number_of_clients = 1 write_pool = ThreadPool(number_of_clients) time.sleep(process_time) logger.info("Process finish here: " + str(time.time())) def write_work_client(writer_key): start_time = time.time() client_id = int(writer_key['client_id']) taskID = writer_key['taskId'] jobID = writer_key['jobid'] datasize = writer_key['write_element_size'] #datasize = 1310720 total_time = writer_key['total_time'] body = b'a' * datasize client_id = int(client_id) count = 0 throughput_step = 1 throughput_count = 1 throughput_total = 0 throughput_nops = 0 ret = [] while time.time() < start_time + total_time: count = count + 1 keyname = str(jobID) + "-" + str(taskID) + "-" + str(count) m = hashlib.md5() m.update(keyname.encode('utf-8')) ridx = int(m.hexdigest()[:8], 16) % nrs randomized_keyname = str(jobID) + "-" + str( taskID) + '-' + m.hexdigest()[:8] + '-' + str(count) #logger.info("(" + str(taskId) + ")" + "The name of the key to write is: " + randomized_keyname) start = time.time() logger.info("[REDIS] [" + str(jobID) + "] " + str(time.time()) + " " + str(taskID) + " " + str(len(body)) + " write " + "S") #rs[ridx].set(randomized_keyname, body) end = time.time() logger.info("[REDIS] [" + str(jobID) + "] " + str(time.time()) + " " + str(taskID) + " " + str(len(body)) + " write " + "E ") #for r in rs: # r.execute() throughput_total += end - start throughput_nops += 1 if end - start_time >= throughput_count: throughput = throughput_nops / throughput_total ret.append((end, throughput)) throughput_nops = 0 throughput_count += throughput_step throughput_total = 0 logger.info("Write finish here: " + str(time.time())) return ret writer_keylist = [] number_of_clients = int(number_of_clients) for i in range(number_of_clients): writer_keylist.append({ 'client_id': i, 'taskId': taskId, 'jobid': jobid_int, 'write_element_size': write_element_size, 'total_time': total_time }) start_time = time.time() write_pool_handler_container = [] write_pool_handler = write_pool.map_async(write_work_client, writer_keylist) write_pool_handler_container.append(write_pool_handler) if len(write_pool_handler_container) > 0: write_pool_handler = write_pool_handler_container.pop() ret = write_pool_handler.get() twait_end = time.time() write_time = twait_end - start_time write_pool.close() write_pool.join() end_of_function = time.time() return begin_of_function, end_of_function, write_time, ret numTasks = int(sys.argv[1]) job_number = int(sys.argv[2]) write_element_size = int(sys.argv[3]) process_time = int(sys.argv[4]) # microseconds total_time = int(sys.argv[5]) redisnode = sys.argv[6] keylist = [] for i in range(numTasks): keylist.append({ 'taskId': i, 'job_number': job_number, 'total_input': numTasks, 'write_element_size': write_element_size, 'process_time': process_time, 'total_time': total_time, 'redis': redisnode }) wrenexec = pywren.default_executor() futures = wrenexec.map(run_command, keylist) pywren.wait(futures) results = [f.result() for f in futures] print("Write " + str(job_number)) run_statuses = [f.run_status for f in futures] invoke_statuses = [f.invoke_status for f in futures] res = { 'results': results, 'run_statuses': run_statuses, 'invoke_statuses': invoke_statuses } filename = "redis-write-" + ".pickle.breakdown" pickle.dump(res, open(filename, 'wb')) return res
def setUp(self): self.wrenexec = pywren.default_executor(job_max_runtime=40)
def test_cholesky_multi_repeats(self): ''' Insert repeated instructions into PC queue avoid double increments ''' print("RUNNING MULTI") np.random.seed(1) size = 256 shard_size = 30 repeats = 15 total_repeats = 150 np.random.seed(2) print("Generating X") X = np.random.randn(size, 128) print("Generating A") A = X.dot(X.T) + size*np.eye(X.shape[0]) shard_sizes = (shard_size, shard_size) A_sharded = BigMatrix("cholesky_test_A_{0}".format( int(time.time())), shape=A.shape, shard_sizes=shard_sizes, write_header=True) A_sharded.free() shard_matrix(A_sharded, A) instructions, trailing, L_sharded = compiler._chol(A_sharded) all_nodes = instructions.unroll_program() L_sharded.free() pwex = pywren.default_executor() executor = pywren.lambda_executor config = npw.config.default() pywren_config = pwex.config program = lp.LambdaPackProgram( instructions, executor=executor, pywren_config=pywren_config, config=config, eager=True) print("PROGRAM HASH", program.hash) cores = 1 program.start() jobs = [] for c in range(cores): p = mp.Process(target=job_runner.lambdapack_run, args=( program,), kwargs={'timeout': 3600, 'pipeline_width': 5}) jobs.append(p) p.start() np.random.seed(0) try: while(program.program_status() == lp.PS.RUNNING): sqs = boto3.resource( 'sqs', region_name=program.control_plane.region) time.sleep(0.5) waiting = 0 running = 0 for i, queue_url in enumerate(program.queue_urls): client = boto3.client('sqs') print("Priority {0}".format(i)) attrs = client.get_queue_attributes(QueueUrl=queue_url, AttributeNames=[ 'ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible'])['Attributes'] print(attrs) waiting += int(attrs["ApproximateNumberOfMessages"]) running += int(attrs["ApproximateNumberOfMessagesNotVisible"]) print("SQS QUEUE STATUS Waiting {0}, Running {1}".format( waiting, running)) for i in range(repeats): p = program.get_progress() if (p is None): continue else: p = int(p) pc = int(np.random.choice(min(p, len(all_nodes)), 1)) node = all_nodes[pc] queue = sqs.Queue(program.queue_urls[0]) total_repeats -= 1 if (total_repeats > 0): print("Malicilously enqueueing node ", pc, node, total_repeats) queue.send_message(MessageBody=json.dumps(node)) time.sleep(1) # for p in jobs: # p.join() except: pass print("Program status") print(program.program_status()) for node in all_nodes: edge_sum = lp.get(program.control_plane.client, program._node_edge_sum_key(*node)) if (edge_sum == None): edge_sum = 0 edge_sum = int(edge_sum) parents = program.program.get_parents(*node) children = program.program.get_children(*node) indegree = len(parents) node_status = program.get_node_status(*node) redis_str = "Node: {0}, Edge Sum: {1}, Indegree: {2}, Node Status {3}".format( node, edge_sum, indegree, node_status) if (edge_sum != indegree): print(redis_str) for p in parents: p_status = program.get_node_status(*p) edge_key = program._edge_key(p[0], p[1], node[0], node[1]) edge_value = lp.get(program.control_plane.client, edge_key) child_str = "Parent Node: {0}, Parent Status: {1}, Edge Key: {2}".format( p, p_status, edge_value) print(child_str) #assert(edge_sum == indegree) program.free() L_npw = L_sharded.numpy() L = np.linalg.cholesky(A) z = np.argmax(np.abs(L - L_npw)) assert(np.allclose(L_npw, L))
def test_cholesky_multi_failures(self): ''' Insert repeated instructions into PC queue avoid double increments ''' print("RUNNING MULTI") np.random.seed(1) size = 256 shard_size = 64 failures = 4 np.random.seed(1) print("Generating X") X = np.random.randn(size, 128) print("Generating A") A = X.dot(X.T) + size*np.eye(X.shape[0]) shard_sizes = (shard_size, shard_size) A_sharded = BigMatrix("cholesky_test_A", shape=A.shape, shard_sizes=shard_sizes, write_header=True) A_sharded.free() shard_matrix(A_sharded, A) instructions, trailing, L_sharded = compiler._chol(A_sharded) pwex = pywren.default_executor() executor = pywren.lambda_executor pywren_config = pwex.config config = npw.config.default() program = lp.LambdaPackProgram( instructions, executor=executor, pywren_config=pywren_config, config=config, eager=False) cores = 16 program.start() jobs = [] for c in range(cores): p = mp.Process(target=job_runner.lambdapack_run, args=( program,), kwargs={'timeout': 3600, 'pipeline_width': 4}) jobs.append(p) p.start() np.random.seed(0) while(program.program_status() == lp.PS.RUNNING): sqs = boto3.resource( 'sqs', region_name=program.control_plane.region) waiting = 0 running = 0 for i, queue_url in enumerate(program.queue_urls): client = boto3.client('sqs') print("Priority {0}".format(i)) attrs = client.get_queue_attributes(QueueUrl=queue_url, AttributeNames=[ 'ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible'])['Attributes'] print(attrs) waiting += int(attrs["ApproximateNumberOfMessages"]) running += int(attrs["ApproximateNumberOfMessagesNotVisible"]) print("SQS QUEUE STATUS Waiting {0}, Running {1}".format( waiting, running)) time.sleep(10) if (np.random.random() > 0.65): for i in range(failures): core = int(np.random.choice(cores, 1)[0]) print("Maliciously Killing a job!") jobs[core].terminate() p = mp.Process(target=job_runner.lambdapack_run, args=( program,), kwargs={'timeout': 3600, 'pipeline_width': 4}) p.start() jobs[core] = p for p in jobs: p.join() print("Program status") print(program.program_status()) program.free() L_npw = L_sharded.numpy() L = np.linalg.cholesky(A) print(L_npw) print(L) print("MAX ", np.max(np.abs(L - L_npw))) assert(np.allclose(L_npw, L))
def run_experiment(problem_size, shard_size, pipeline, priority, lru, eager, truncate, max_cores, start_cores, trial, launch_granularity, timeout, log_granularity, autoscale_policy, failure_percentage, max_failure_events, failure_time): # set up logging logger = logging.getLogger() for key in logging.Logger.manager.loggerDict: logging.getLogger(key).setLevel(logging.CRITICAL) logger.setLevel(logging.DEBUG) arg_bytes = pickle.dumps( (problem_size, shard_size, pipeline, priority, lru, eager, truncate, max_cores, start_cores, trial, launch_granularity, timeout, log_granularity, autoscale_policy, failure_percentage, max_failure_events, failure_time)) arg_hash = hashlib.md5(arg_bytes).hexdigest() log_file = "failure_experiments/{0}.log".format(arg_hash) fh = logging.FileHandler(log_file) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) ch = logging.StreamHandler() ch.setLevel(logging.INFO) ch.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(ch) logger.info("Logging to {0}".format(log_file)) X = np.random.randn(problem_size, 1) pwex = pywren.default_executor() shard_sizes = [shard_size, 1] X_sharded = BigMatrix("cholesky_test_{0}_{1}".format( problem_size, shard_size), shape=X.shape, shard_sizes=shard_sizes, write_header=True) shard_matrix(X_sharded, X) print("Generating PSD matrix...") XXT_sharded = binops.gemm(pwex, X_sharded, X_sharded.T, overwrite=False) XXT_sharded.lambdav = problem_size * 10 instructions, L_sharded, trailing = lp._chol(XXT_sharded) pipeline_width = args.pipeline if (priority): num_priorities = 5 else: num_priorities = 1 if (lru): cache_size = 5 else: cache_size = 0 REDIS_CLIENT = redis.StrictRedis(REDIS_ADDR, port=REDIS_PORT, password=REDIS_PASS, db=0, socket_timeout=5) if (truncate is not None): instructions = instructions[:truncate] config = pwex.config program = lp.LambdaPackProgram(instructions, executor=pywren.lambda_executor, pywren_config=config, num_priorities=num_priorities, eager=eager) redis_env = { "REDIS_ADDR": os.environ.get("REDIS_ADDR", ""), "REDIS_PASS": os.environ.get("REDIS_PASS", "") } done_counts = [] ready_counts = [] post_op_counts = [] not_ready_counts = [] running_counts = [] sqs_invis_counts = [] sqs_vis_counts = [] up_workers_counts = [] busy_workers_counts = [] times = [] flops = [] reads = [] writes = [] failure_times = [] exp = {} exp["redis_done_counts"] = done_counts exp["redis_ready_counts"] = ready_counts exp["redis_post_op_counts"] = post_op_counts exp["redis_not_ready_counts"] = not_ready_counts exp["redis_running_counts"] = running_counts exp["sqs_invis_counts"] = sqs_invis_counts exp["sqs_vis_counts"] = sqs_vis_counts exp["busy_workers"] = busy_workers_counts exp["up_workers"] = up_workers_counts exp["times"] = times exp["lru"] = lru exp["priority"] = priority exp["eager"] = eager exp["truncate"] = truncate exp["max_cores"] = max_cores exp["problem_size"] = problem_size exp["shard_size"] = shard_size exp["pipeline"] = pipeline exp["flops"] = flops exp["reads"] = reads exp["writes"] = writes exp["trial"] = trial exp["launch_granularity"] = launch_granularity exp["log_granularity"] = log_granularity exp["autoscale_policy"] = autoscale_policy exp["failure_times"] = failure_times logger.info("Longest Path: {0}".format(program.longest_path)) program.start() t = time.time() logger.info("Starting with {0} cores".format(start_cores)) failure_keys = [ "{0}_failure_{1}_{2}".format(program.hash, i, 0) for i in range(start_cores) ] all_futures = pwex.map(lambda x: job_runner.lambdapack_run_with_failures( failure_keys[x], program, pipeline_width=pipeline_width, cache_size=cache_size, timeout=timeout), range(start_cores), extra_env=redis_env) start_time = time.time() last_run_time = start_time last_failure = time.time() num_failure_events = 0 while (program.program_status() == lp.PS.RUNNING): curr_time = int(time.time() - start_time) max_pc = program.get_max_pc() times.append(int(time.time())) time.sleep(log_granularity) waiting = 0 running = 0 for i, queue_url in enumerate(program.queue_urls): client = boto3.client('sqs') attrs = client.get_queue_attributes( QueueUrl=queue_url, AttributeNames=[ 'ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible' ])['Attributes'] waiting += int(attrs["ApproximateNumberOfMessages"]) running += int(attrs["ApproximateNumberOfMessagesNotVisible"]) sqs_invis_counts.append(running) sqs_vis_counts.append(waiting) busy_workers = REDIS_CLIENT.get("{0}_busy".format(program.hash)) if (busy_workers == None): busy_workers = 0 else: busy_workers = int(busy_workers) up_workers = program.get_up() if (up_workers == None): up_workers = 0 else: up_workers = int(up_workers) up_workers_counts.append(up_workers) busy_workers_counts.append(busy_workers) logger.debug("Waiting: {0}, Currently Processing: {1}".format( waiting, running)) logger.debug("{2}: Up Workers: {0}, Busy Workers: {1}".format( up_workers, busy_workers, curr_time)) if ((curr_time % INFO_FREQ) == 0): logger.info("Max PC is {0}".format(max_pc)) logger.info("Waiting: {0}, Currently Processing: {1}".format( waiting, running)) logger.info("{2}: Up Workers: {0}, Busy Workers: {1}".format( up_workers, busy_workers, curr_time)) #print("{5}: Not Ready: {0}, Ready: {1}, Running: {4}, Post OP: {2}, Done: {3}".format(not_ready_count, ready_count, post_op_count, done_count, running_count, curr_time)) current_gflops = program.get_flops() if (current_gflops is None): current_gflops = 0 else: current_gflops = int(current_gflops) / 1e9 flops.append(current_gflops) current_gbytes_read = program.get_read() if (current_gbytes_read is None): current_gbytes_read = 0 else: current_gbytes_read = int(current_gbytes_read) / 1e9 reads.append(current_gbytes_read) current_gbytes_write = program.get_write() if (current_gbytes_write is None): current_gbytes_write = 0 else: current_gbytes_write = int(current_gbytes_write) / 1e9 writes.append(current_gbytes_write) #print("{0}: Total GFLOPS {1}, Total GBytes Read {2}, Total GBytes Write {3}".format(curr_time, current_gflops, current_gbytes_read, current_gbytes_write)) time_since_launch = time.time() - last_run_time if (autoscale_policy == "dynamic"): if (time_since_launch > launch_granularity and up_workers < np.ceil(waiting * 0.5 / pipeline_width) and up_workers < max_cores): cores_to_launch = int( min( np.ceil(waiting / pipeline_width) - up_workers, max_cores - up_workers)) logger.info( "launching {0} new tasks....".format(cores_to_launch)) _failure_keys = [ "{0}_failure_{1}_{2}".format(program.hash, i, curr_time) for i in range(cores_to_launch) ] new_futures = pwex.map( lambda x: job_runner.lambdapack_run_with_failures( _failure_keys[x], program, pipeline_width=pipeline_width, cache_size=cache_size, timeout=timeout), range(cores_to_launch), extra_env=redis_env) last_run_time = time.time() # check if we OOM-erred # [x.result() for x in all_futures] all_futures.extend(new_futures) elif (autoscale_policy == "constant_timeout"): if (time_since_launch > (0.75 * timeout)): cores_to_launch = max_cores logger.info( "launching {0} new tasks....".format(cores_to_launch)) _failure_keys = [ "{0}_failure_{1}_{2}".format(program.hash, i, curr_time) for i in range(cores_to_launch) ] new_futures = pwex.map( lambda x: job_runner.lambdapack_run_with_failures( _failure_keys[x], program, pipeline_width=pipeline_width, cache_size=cache_size, timeout=timeout), range(cores_to_launch), extra_env=redis_env) last_run_time = time.time() failure_keys += _failure_keys # check if we OOM-erred # [x.result() for x in all_futures] all_futures.extend(new_futures) else: raise Exception("unknown autoscale policy") if ((time.time() - last_failure) > failure_time and num_failure_events < max_failure_events): logging.info("Killing some jobs") idxs = np.random.choice(len(failure_keys), int(failure_percentage * len(failure_keys)), replace=False) num_failure_events += 1 last_failure = time.time() failure_times.append(last_failure) for i in idxs: logging.info("Killing: job {0}".format(i)) REDIS_CLIENT.set(failure_keys[i], 1) exp["all_futures"] = all_futures for pc in range(program.num_inst_blocks): run_count = REDIS_CLIENT.get("{0}_{1}_start".format(program.hash, pc)) if (run_count is None): run_count = 0 else: run_count = int(run_count) if (run_count != 1): logger.info("PC: {0}, Run Count: {1}".format(pc, run_count)) e = time.time() logger.info(program.program_status()) logger.info("PROGRAM STATUS " + str(program.program_status())) logger.info("PROGRAM HASH " + str(program.hash)) logger.info("Took {0} seconds".format(e - t)) exp["total_runtime"] = e - t exp["num_failure_events"] = num_failure_events # collect in executor = fs.ThreadPoolExecutor(72) futures = [] for i in range(0, program.num_inst_blocks, 1): futures.append(executor.submit(program.get_profiling_info, i)) res = fs.wait(futures) profiled_blocks = [f.result() for f in futures] serializer = serialize.SerializeIndependent() byte_string = serializer([profiled_blocks])[0][0] exp["profiled_block_pickle_bytes"] = byte_string read, write, total_flops, bins, instructions, runtimes = lp.perf_profile( profiled_blocks, num_bins=100) flop_rate = sum(total_flops) / max(bins) exp["flop_rate"] = flop_rate print("Average Flop rate of {0}".format(flop_rate)) # save other stuff try: os.mkdir("failure_experiments/") except FileExistsError: pass exp_bytes = pickle.dumps(exp) dump_path = "failure_experiments/{0}.pickle".format(arg_hash) print("Dumping experiment pickle to {0}".format(dump_path)) with open(dump_path, "wb+") as f: f.write(exp_bytes)
def start_batch(minibatches): wrenexec = pywren.default_executor() futures = wrenexec.map(gradient_batch, minibatches) # Map future return futures
fh.setLevel(logging.DEBUG) fh.setFormatter(pywren.wren.formatter) pywren.wren.logger.addHandler(fh) t1 = time.time() LOOPCOUNT = 6 N = 1600 MAT_N = 4096 iters = np.arange(N) def f(x): return compute_flops(LOOPCOUNT, MAT_N) pwex = pywren.default_executor() futures = pwex.map(f, iters) print "invocation done, dur=", time.time() - t1 print futures[0].callset_id result_count = 0 while result_count < N: fs_dones, fs_notdones = pywren.wait(futures) result_count = len(fs_dones) est_flop = 2 * result_count * LOOPCOUNT * MAT_N**3 est_gflops = est_flop / 1e9 / (time.time() - t1) print "jobs done: {:5d} runtime: {:5.1f}s {:8.1f} GFLOPS ".format( result_count,
def sort_data(): def run_command(key): global concat_time pywren.wrenlogging.default_config('INFO') begin_of_function = time.time() logger = logging.getLogger(__name__) logger.info("taskId = " + str(key['taskId'])) logger.info("number of works = " + str(key['works'])) logger.info("number of input partitions = " + str(key['parts'])) bucketName = key['bucket'] taskId = key['taskId'] rounds = key['works'] numPartitions = int(key['parts']) # 10 bytes for sorting recordType = np.dtype([('key', 'S10'), ('value', 'S90')]) client = boto3.client('s3', 'us-east-2') rs = [] for hostname in key['redis'].split(";"): r1 = StrictRedis(host=hostname, port=6379, db=0).pipeline() rs.append(r1) nrs = len(rs) [t1, t2, t3] = [time.time()] * 3 [read_time, work_time, write_time] = [0] * 3 # a total of 10 threads write_pool = ThreadPool(1) number_of_clients = 1 read_pool = ThreadPool(number_of_clients) clients = [] number_of_clients = int(number_of_clients) for client_id in range(number_of_clients): clients.append(boto3.client('s3', 'us-east-2')) write_pool_handler_container = [] rounds = int(rounds) logger.info("number of rounds here here here" + str(rounds)) for roundIdx in range(rounds): logger.info("HHHHHHH 1") inputs = [] def read_work(reader_key): client_id = reader_key['client_id'] reduceId = rounds * taskId + reader_key['roundIdx'] key_per_client = reader_key['key-per-client'] logger.info("HHHHHHH 4") key_per_client = int(key_per_client) client_id = int(client_id) for mapId in range( key_per_client * client_id, min(key_per_client * (client_id + 1), numPartitions)): # for mapId in range(1): keyname = "shuffle/part-" + str(mapId) + "-" + str( reduceId) m = hashlib.md5() m.update(keyname.encode('utf-8')) randomized_keyname = "shuffle/" + m.hexdigest( )[:8] + "-part-" + str(mapId) + "-" + str(reduceId) logging.info("The name of the key to read is: " + randomized_keyname) logger.info("HHHHHHH 5") try: ridx = int(m.hexdigest()[:8], 16) % nrs rs[ridx].get(randomized_keyname) except Exception: logger.info("reading error key " + randomized_keyname) raise logger.info("HHHHHHH 6") for r in rs: logger.info("HHHHHHH 7") objs = r.execute() logger.info("HHHHHHH 8") data = [ np.fromstring(obj, dtype=recordType) for obj in objs ] logger.info("HHHHHHH 9") [d.sort(order='key') for d in data] logger.info("HHHHHHH 10") inputs.extend(data) reader_keylist = [] key_per_client = (numPartitions + number_of_clients - 1) / number_of_clients number_of_clients = int(number_of_clients) logger.info("HHHHHHH 2") for client_id in range(number_of_clients): reader_keylist.append({ 'roundIdx': roundIdx, 'client_id': client_id, 'key-per-client': key_per_client }) logger.info("HHHHHHH 3") read_pool.map(read_work, reader_keylist) logger.info("HHHHHHH 11") t1 = time.time() logger.info('read time ' + str(t1 - t3)) read_time = t1 - t3 if len(write_pool_handler_container) > 0: write_pool_handler = write_pool_handler_container.pop() twait_start = time.time() write_pool_handler.wait() twait_end = time.time() if twait_end - twait_start > 0.5: logger.info('write time = ' + str(twait_end - t3) + " slower than read " + str(t1 - t3)) else: logger.info('write time < ' + str(twait_end - t3) + " faster than read " + str(t1 - t3)) t2 = time.time() records = np.concatenate(inputs) gc.collect() concat_time = len(records) records.sort(order='key', kind='mergesort') t3 = time.time() logger.info('sort time: ' + str(t3 - t2)) work_time = t3 - t2 def write_work(reduceId): keyname = "output/part-" + str(reduceId) m = hashlib.md5() m.update(keyname.encode('utf-8')) randomized_keyname = "output/" + m.hexdigest( )[:8] + "-part-" + str(reduceId) body = records.tobytes() client.put_object(Bucket=bucketName, Key=randomized_keyname, Body=body) write_pool_handler = write_pool.map_async( write_work, [taskId * rounds + roundIdx]) write_pool_handler_container.append(write_pool_handler) if len(write_pool_handler_container) > 0: write_pool_handler = write_pool_handler_container.pop() write_pool_handler.wait() twait_end = time.time() logger.info('last write time = ' + str(twait_end - t3)) write_time = twait_end - t3 read_pool.close() write_pool.close() read_pool.join() write_pool.join() end_of_function = time.time() return begin_of_function, end_of_function, read_time, work_time, write_time, concat_time numTasks = int(sys.argv[1]) worksPerTask = int(sys.argv[2]) numPartitions = int(sys.argv[3]) redisnode = sys.argv[4] rate = int(sys.argv[5]) keylist = [] for i in range(numTasks): keylist.append({ 'taskId': i, 'works': worksPerTask, 'redis': redisnode, 'parts': numPartitions, 'bucket': "yupengtang-pywren-49" }) wrenexec = pywren.default_executor() futures = wrenexec.map(run_command, keylist) pywren.wait(futures) results = [f.result() for f in futures] #print(results) print("sort done") run_statuses = [f.run_status for f in futures] invoke_statuses = [f.invoke_status for f in futures] res = { 'results': results, 'run_statuses': run_statuses, 'invoke_statuses': invoke_statuses } filename = "redis-sort-sort-con" + str(rate) + ".pickle.breakdown." + str( len(redisnode.split(";"))) pickle.dump(res, open(filename, 'wb')) return res
def run_experiment(problem_size, shard_size, pipeline, num_priorities, lru, eager, truncate, max_cores, start_cores, trial, launch_granularity, timeout, log_granularity, autoscale_policy, standalone, warmup, verify, matrix_exists, read_limit, write_limit): # set up logging invoke_executor = fs.ThreadPoolExecutor(1) logger = logging.getLogger() region = wc.default()["account"]["aws_region"] print("REGION", region) for key in logging.Logger.manager.loggerDict: logging.getLogger(key).setLevel(logging.CRITICAL) logger.setLevel(logging.DEBUG) arg_bytes = pickle.dumps( (problem_size, shard_size, pipeline, num_priorities, lru, eager, truncate, max_cores, start_cores, trial, launch_granularity, timeout, log_granularity, autoscale_policy, read_limit, write_limit)) arg_hash = hashlib.md5(arg_bytes).hexdigest() log_file = "{0}.log".format(arg_hash) fh = logging.FileHandler(log_file) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) ch = logging.StreamHandler() ch.setLevel(logging.INFO) ch.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(ch) logger.info("Logging to {0}".format(log_file)) if standalone: extra_env = { "AWS_ACCESS_KEY_ID": os.environ["AWS_ACCESS_KEY_ID"], "AWS_SECRET_ACCESS_KEY": os.environ["AWS_ACCESS_KEY_ID"], "OMP_NUM_THREADS": "1", "AWS_DEFAULT_REGION": region } config = wc.default() config['runtime']['s3_bucket'] = 'numpywrenpublic' key = "pywren.runtime/pywren_runtime-3.6-numpywren-standalone.tar.gz" config['runtime']['s3_key'] = key pwex = pywren.standalone_executor(config=config) else: extra_env = {"AWS_DEFAULT_REGION": region} config = wc.default() config['runtime']['s3_bucket'] = 'numpywrenpublic-us-east-1' key = "pywren.runtime/pywren_runtime-3.6-numpywren-08-25-2018.tar.gz" config['runtime']['s3_key'] = key pwex = pywren.default_executor(config=config) if (not matrix_exists): X = np.random.randn(problem_size, 1) shard_sizes = [shard_size, 1] X_sharded = BigMatrix("cholesky_test_{0}_{1}".format( problem_size, shard_size), shape=X.shape, shard_sizes=shard_sizes, write_header=True, autosqueeze=False, bucket="numpywrentop500test", hash_keys=False) shard_matrix(X_sharded, X) print("Generating PSD matrix...") t = time.time() print(X_sharded.shape) XXT_sharded = binops.gemm(pwex, X_sharded, X_sharded.T, overwrite=False) e = time.time() print("GEMM took {0}".format(e - t)) else: X_sharded = BigMatrix("cholesky_test_{0}_{1}".format( problem_size, shard_size), autosqueeze=False, hash_keys=False, bucket="numpywrentop500test") key_name = binops.generate_key_name_binop(X_sharded, X_sharded.T, "gemm") XXT_sharded = BigMatrix(key_name, hash_keys=False, bucket="numpywrentop500test") XXT_sharded.lambdav = problem_size * 10 if (verify): A = XXT_sharded.numpy() print("Computing local cholesky") L = np.linalg.cholesky(A) t = time.time() instructions, trailing, L_sharded = compiler._chol(XXT_sharded, truncate=truncate) pipeline_width = args.pipeline if (lru): cache_size = 5 else: cache_size = 0 pywren_config = pwex.config config = npw.config.default() program = lp.LambdaPackProgram(instructions, executor=pywren.lambda_executor, pywren_config=pywren_config, num_priorities=num_priorities, eager=eager, config=config, write_limit=write_limit, read_limit=read_limit) warmup_start = time.time() if (warmup): warmup_sleep = 170 def warmup_fn(x): program.incr_up(1) time.sleep(warmup_sleep) program.decr_up(1) print("Warming up...") futures = pwex.map(warmup_fn, range(max_cores)) last_spinup = time.time() while (True): if ((time.time() - last_spinup) > 0.75 * warmup_sleep): print("Calling pwex.map..") futures = pwex.map(warmup_fn, range(max_cores)) last_spinup = time.time() time.sleep(2) if (program.get_up() is None): up_workers = 0 else: up_workers = int(program.get_up()) print("{0} workers alive".format(up_workers)) if (up_workers >= max_cores): time.sleep(warmup_sleep) break warmup_end = time.time() print("Warmup took {0} seconds".format(warmup_end - warmup_start)) e = time.time() print("Program compile took {0} seconds".format(e - t)) print("program.hash", program.hash) REDIS_CLIENT = program.control_plane.client done_counts = [] ready_counts = [] post_op_counts = [] not_ready_counts = [] running_counts = [] sqs_invis_counts = [] sqs_vis_counts = [] up_workers_counts = [] busy_workers_counts = [] read_objects = [] write_objects = [] all_read_timeouts = [] all_write_timeouts = [] all_redis_timeouts = [] times = [time.time()] flops = [0] reads = [0] writes = [0] print("LRU", lru) print("eager", eager) exp = {} exp["redis_done_counts"] = done_counts exp["redis_ready_counts"] = ready_counts exp["redis_post_op_counts"] = post_op_counts exp["redis_not_ready_counts"] = not_ready_counts exp["redis_running_counts"] = running_counts exp["sqs_invis_counts"] = sqs_invis_counts exp["sqs_vis_counts"] = sqs_vis_counts exp["busy_workers"] = busy_workers_counts exp["up_workers"] = up_workers_counts exp["times"] = times exp["lru"] = lru exp["priority"] = num_priorities exp["eager"] = eager exp["truncate"] = truncate exp["max_cores"] = max_cores exp["problem_size"] = problem_size exp["shard_size"] = shard_size exp["pipeline"] = pipeline exp["flops"] = flops exp["reads"] = reads exp["writes"] = writes exp["read_objects"] = read_objects exp["write_objects"] = write_objects exp["read_timeouts"] = all_read_timeouts exp["write_timeouts"] = all_write_timeouts exp["redis_timeouts"] = all_redis_timeouts exp["trial"] = trial exp["launch_granularity"] = launch_granularity exp["log_granularity"] = log_granularity exp["autoscale_policy"] = autoscale_policy exp["standalone"] = standalone exp["program"] = program exp["time_steps"] = 1 exp["failed"] = False program.start() t = time.time() logger.info("Starting with {0} cores".format(start_cores)) invoker = fs.ThreadPoolExecutor(1) all_future_futures = invoker.submit(lambda: pwex.map( lambda x: job_runner.lambdapack_run(program, pipeline_width=pipeline_width, cache_size=cache_size, timeout=timeout), range(start_cores), extra_env=extra_env)) # print(all_future_futures.result()) all_futures = [all_future_futures] # print([f.result() for f in all_futures]) start_time = time.time() last_run_time = start_time print(program.program_status()) print("QUEUE URLS", len(program.queue_urls)) total_lambda_epochs = start_cores try: while (program.program_status() == lp.PS.RUNNING): time.sleep(log_granularity) curr_time = int(time.time() - start_time) p = program.get_progress() if (p is None): print("no progress...") continue else: p = int(p) times.append(int(time.time())) max_pc = p waiting = 0 running = 0 for i, queue_url in enumerate(program.queue_urls): client = boto3.client('sqs') attrs = client.get_queue_attributes( QueueUrl=queue_url, AttributeNames=[ 'ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible' ])['Attributes'] waiting += int(attrs["ApproximateNumberOfMessages"]) running += int(attrs["ApproximateNumberOfMessagesNotVisible"]) sqs_invis_counts.append(running) sqs_vis_counts.append(waiting) busy_workers = REDIS_CLIENT.get("{0}_busy".format(program.hash)) if (busy_workers == None): busy_workers = 0 else: busy_workers = int(busy_workers) up_workers = program.get_up() if (up_workers == None): up_workers = 0 else: up_workers = int(up_workers) up_workers_counts.append(up_workers) busy_workers_counts.append(busy_workers) logger.debug("{2}: Up Workers: {0}, Busy Workers: {1}".format( up_workers, busy_workers, curr_time)) if ((curr_time % INFO_FREQ) == 0): logger.info("Waiting: {0}, Currently Processing: {1}".format( waiting, running)) logger.info("{2}: Up Workers: {0}, Busy Workers: {1}".format( up_workers, busy_workers, curr_time)) current_gflops = program.get_flops() if (current_gflops is None): current_gflops = 0 else: current_gflops = int(current_gflops) / 1e9 flops.append(current_gflops) current_gbytes_read = program.get_read() if (current_gbytes_read is None): current_gbytes_read = 0 else: current_gbytes_read = int(current_gbytes_read) / 1e9 reads.append(current_gbytes_read) current_gbytes_write = program.get_write() if (current_gbytes_write is None): current_gbytes_write = 0 else: current_gbytes_write = int(current_gbytes_write) / 1e9 writes.append(current_gbytes_write) gflops_rate = flops[-1] / (times[-1] - times[0]) greads_rate = reads[-1] / (times[-1] - times[0]) gwrites_rate = writes[-1] / (times[-1] - times[0]) b = XXT_sharded.shard_sizes[0] current_objects_read = (current_gbytes_read * 1e9) / (b * b * 8) current_objects_write = (current_gbytes_write * 1e9) / (b * b * 8) read_objects.append(current_objects_read) write_objects.append(current_objects_write) read_rate = read_objects[-1] / (times[-1] - times[0]) write_rate = write_objects[-1] / (times[-1] - times[0]) avg_workers = np.mean(up_workers_counts) smooth_len = 10 if (len(flops) > smooth_len + 5): gflops_rate_5_min_window = (flops[-1] - flops[-smooth_len]) / ( times[-1] - times[-smooth_len]) gread_rate_5_min_window = (reads[-1] - reads[-smooth_len]) / ( times[-1] - times[-smooth_len]) gwrite_rate_5_min_window = ( writes[-1] - writes[-smooth_len]) / (times[-1] - times[-smooth_len]) read_rate_5_min_window = (read_objects[-1] - read_objects[-smooth_len]) / ( times[-1] - times[-smooth_len]) write_rate_5_min_window = (write_objects[-1] - write_objects[-smooth_len]) / ( times[-1] - times[-smooth_len]) workers_5_min_window = np.mean(up_workers_counts[-smooth_len:]) else: gflops_rate_5_min_window = "N/A" gread_rate_5_min_window = "N/A" gwrite_rate_5_min_window = "N/A" workers_5_min_window = "N/A" read_rate_5_min_window = "N/A" write_rate_5_min_window = "N/A" read_timeouts = int(REDIS_CLIENT.get("s3.timeouts.read")) write_timeouts = int(REDIS_CLIENT.get("s3.timeouts.write")) redis_timeouts = int(REDIS_CLIENT.get("redis.timeouts")) all_read_timeouts.append(read_timeouts) all_write_timeouts.append(write_timeouts) all_redis_timeouts.append(redis_timeouts) read_timeouts_fraction = read_timeouts / current_objects_read write_timeouts_fraction = write_timeouts / current_objects_write print("=======================================") print("Max PC is {0}".format(max_pc)) print("Waiting: {0}, Currently Processing: {1}".format( waiting, running)) print("{2}: Up Workers: {0}, Busy Workers: {1}".format( up_workers, busy_workers, curr_time)) print( "{0}: Total GFLOPS {1}, Total GBytes Read {2}, Total GBytes Write {3}" .format(curr_time, current_gflops, current_gbytes_read, current_gbytes_write)) print( "{0}: Average GFLOPS rate {1}, Average GBytes Read rate {2}, Average GBytes Write rate {3}, Average Worker Count {4}" .format(curr_time, gflops_rate, greads_rate, gwrites_rate, avg_workers)) print("{0}: Average read txns/s {1}, Average write txns/s {2}". format(curr_time, read_rate, write_rate)) print( "{0}: smoothed GFLOPS rate {1}, smoothed GBytes Read rate {2}, smoothed GBytes Write rate {3}, smoothed Worker Count {4}" .format(curr_time, gflops_rate_5_min_window, gread_rate_5_min_window, gwrite_rate_5_min_window, workers_5_min_window)) print("{0}: smoothed read txns/s {1}, smoothed write txns/s {2}". format(curr_time, read_rate_5_min_window, write_rate_5_min_window)) print( "{0}: Read timeouts: {1}, Write timeouts: {2}, Redis timeouts: {3} " .format(curr_time, read_timeouts, write_timeouts, redis_timeouts)) print( "{0}: Read timeouts fraction: {1}, Write timeouts fraction: {2}" .format(curr_time, read_timeouts_fraction, write_timeouts_fraction)) print("=======================================") time_since_launch = time.time() - last_run_time if (autoscale_policy == "dynamic"): if (time_since_launch > launch_granularity and up_workers < np.ceil(waiting * 0.5 / pipeline_width) and up_workers < max_cores): cores_to_launch = int( min( np.ceil(waiting / pipeline_width) - up_workers, max_cores - up_workers)) logger.info( "launching {0} new tasks....".format(cores_to_launch)) new_future_futures = invoker.submit( lambda: pwex.map(lambda x: job_runner.lambdapack_run( program, pipeline_width=pipeline_width, cache_size=cache_size, timeout=timeout), range(cores_to_launch), extra_env=extra_env)) last_run_time = time.time() # check if we OOM-erred # [x.result() for x in all_futures] all_futures.extend(new_future_futures) elif (autoscale_policy == "constant_timeout"): if (time_since_launch > (0.85 * timeout)): cores_to_launch = max_cores logger.info( "launching {0} new tasks....".format(cores_to_launch)) new_future_futures = invoker.submit( lambda: pwex.map(lambda x: job_runner.lambdapack_run( program, pipeline_width=pipeline_width, cache_size=cache_size, timeout=timeout), range(cores_to_launch), extra_env=extra_env)) last_run_time = time.time() # check if we OOM-erred # [x.result() for x in all_futures] all_futures.append(new_future_futures) else: raise Exception("unknown autoscale policy") exp["time_steps"] += 1 if (verify): L_sharded_local = L_sharded.numpy() print("max diff", np.max(np.abs(L_sharded_local - L))) except KeyboardInterrupt: exp["failed"] = True program.stop() pass except Exception as e: traceback.print_exc() exp["failed"] = True program.stop() raise pass print(program.program_status()) exp["all_futures"] = all_futures exp_bytes = dill.dumps(exp) client = boto3.client('s3') client.put_object(Key="lambdapack/{0}/runtime.pickle".format(program.hash), Body=exp_bytes, Bucket=program.bucket) print("=======================") print("=======================") print("Execution Summary:") print("Executed Program ID: {0}".format(program.hash)) print("Program Success: {0}".format((not exp["failed"]))) print("Problem Size: {0}".format(exp["problem_size"])) print("Shard Size: {0}".format(exp["shard_size"])) print("Total Execution time: {0}".format(times[-1] - times[0])) print("Average Flop Rate (GFlop/s): {0}".format(exp["flops"][-1] / (times[-1] - times[0]))) with open("/tmp/last_run", "w+") as f: f.write(program.hash)
for ii in range(num_replicas): destinations.append(key + format_string.format(ii + 1, num_replicas)) if use_pywren_for_replicas: def s3_cp(dest): data, _ = utils.get_s3_object_bytes_with_backoff(key, bucket=bucket) utils.put_s3_object_bytes_with_backoff(data, dest, bucket=bucket) return dest pywren_config = wc.default() pywren_config['runtime']['s3_bucket'] = 'imagenet2pywren' pywren_config['runtime'][ 's3_key'] = 'pywren.runtime/pywren_runtime-3.6-imagenet2pywren.tar.gz' pwex = pywren.default_executor(config=pywren_config) pbar = tqdm(total=len(destinations)) futures = pwex.map(s3_cp, destinations, exclude_modules=['site-packages']) last_status = 0 done, not_done = pywren.wait(futures) while len(not_done) > 0: ALWAYS = 3 done, not_done = pywren.wait(futures, ALWAYS) pbar.update(len(done) - last_status) last_status = len(done) time.sleep(1) all_results = [] for res in done: all_results.append(res.result())
def chunks(file): s3 = s3fs.S3FileSystem() store = s3fs.mapping.S3Map(file, s3=s3) return zarr.open(store, mode="r").chunks def num_rows(x): s3 = s3fs.S3FileSystem() store = s3fs.mapping.S3Map('sc-tom-test-data/10x-10k-subset.zarr', s3=s3) adata = ad.read_zarr(store) return len(adata) print(chunks('sc-tom-test-data/10x-10k-subset.zarr/X')) wrenexec = pywren.default_executor() future = wrenexec.call_async(chunks, 'sc-tom-test-data/10x-10k-subset.zarr/X') print(future.result()) # Traceback (most recent call last): # File "scratch/serverless-zarr-pywren.py", line 28, in <module> # print(future.result()) # File "/Users/tom/workspace/single-cell-experiments/venv/lib/python3.6/site-packages/pywren/future.py", line 202, in result # reraise(*self._traceback) # File "/Users/tom/workspace/single-cell-experiments/venv/lib/python3.6/site-packages/six.py", line 692, in reraise # raise value.with_traceback(tb) # File "/var/task/jobrunner.py", line 29, in <module> # File "/tmp/pymodules/pywren/serialize/cloudpickle/cloudpickle.py", line 718, in subimport # File "/tmp/pymodules/zarr/__init__.py", line 6, in <module> # File "/tmp/pymodules/zarr/core.py", line 13, in <module> # File "/tmp/pymodules/zarr/util.py", line 13, in <module>
def toymap(key, share): def run_command(key): pywren.wrenlogging.default_config('INFO') logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) logger.info("before everything") logger.info(key) t_s = time.time() partition_num = key['partition_num'] rounds = key['rounds'] #em = key['em'] taskId = key['taskId'] appName = key['appName'] # partition_num = 1 # rounds = 8 # #em = key['em'] # taskId = 1 # appName = 'test-1' em = JiffyClient(host=key['em']) logger.info("berfore queue") data_ques, msg_que = open_or_create_jiffy_queues( em, appName, partition_num, 1, 'sender') logger.info("queue opend") for i in range(rounds): ### dd = read_s3_table(key) msg = create_msg(rounds, taskId, i, partition_num) ######### create a table here to replace the input right_table = 100000 indices = tm.makeStringIndex(right_table).values key = np.tile(indices[:right_table], 1) right = DataFrame({ "key": key, "value": np.random.randn(right_table) }) logger.info("Finish generating data") x = 0 if i == rounds - 1: x = 1 encoded = right.to_csv(sep="|", header=False, index=False).encode('utf-8') a = np.random.randint(1, 10, 500000) encoded = np.asarray(a).astype('S100').tobytes() # print(sys.getsizeof(encoded)) data_path = "/" + appName + "/" + '01' test_que = em.open_or_create_queue(data_path, "local://tmp", 10, 1) logger.info("get encoded size" + str(sys.getsizeof(encoded))) ta = time.time() test_que.put(encoded) tb = time.time() logger.info("wirte takes" + str(tb - ta)) logger.info("before get") obj = test_que.get() logger.info("get obj of size" + str(sys.getsizeof(obj))) tc = time.time() logger.info("get takes " + str(tc - tb)) # data_ques[0].put(encoded) # logger.info("wirte finished") # logger.info("before get") # obj = data_ques[0].get() #res = write_jiffy_partitions(right, ['key'], 'uniform', partition_num, data_ques, msg_que = msg_que, msg = msg, fin = x) t_f = time.time() # share.append([t_s,t_f]) return ([t_s, t_f]) wrenexec = pywren.default_executor() #wrenexec = pywren.standalone_executor() keylist = [] keylist.append(key) print(keylist) futures = wrenexec.map(run_command, keylist) # for key in keylist: # run_command(key) pywren.wait(futures) results = [f.result() for f in futures] share.append(results)