def run_write(args, data, process_id, timer): client = alluxio.Client(args.host, args.port) for iteration in range(args.iteration): print('process {}, iteration {} ... '.format(process_id, iteration), end='') dst = alluxio_path(args.dst, iteration, args.node, process_id) write_type = alluxio.wire.WriteType(args.write_type) t = write(client, data, dst, write_type, timer) print('{} seconds'.format(t)) sys.stdout.flush() # https://stackoverflow.com/questions/2774585/child-processes-created-with-python-multiprocessing-module-wont-print
def setup_client(handler): host = 'localhost' port = get_free_port() print port server = HTTPServer((host, port), handler) server_thread = Thread(target=server.serve_forever) server_thread.setDaemon(True) server_thread.start() client = alluxio.Client(host, port, timeout=60) return client, lambda: server.shutdown
def run_read(args, expected, process_id, timer): client = alluxio.Client(args.host, args.port) for iteration in range(args.iteration): print('process {}, iteration {} ... '.format(process_id, iteration), end='') src = alluxio_path(args.src, iteration, args.node, process_id) if args.node else args.src t = read(client, src, expected, timer) print('{} seconds'.format(t)) sys.stdout.flush( ) # https://stackoverflow.com/questions/2774585/child-processes-created-with-python-multiprocessing-module-wont-print
def main(args): with open(args.expected, 'r') as f: expected = f.read() total_time = 0 c = alluxio.Client(args.host, args.port) for iteration in range(args.iteration): src = alluxio_path(args.src, iteration, 0, 0) print('Iteration %d ... ' % iteration, end='') start_time = time.time() with c.open(src, 'r') as f: data = f.read() elapsed_time = time.time() - start_time assert expected == data print('{} seconds'.format(elapsed_time)) total_time += elapsed_time print_stats(args, total_time)
def main(args): with open(args.src, 'r') as f: data = f.read() total_time = 0 c = alluxio.Client(args.host, args.port) for iteration in range(args.iteration): dst = alluxio_path(args.dst, iteration, 0, 0) write_type = alluxio.wire.WriteType(args.write_type) print('Iteration %d ... ' % iteration, end='') start_time = time.time() with c.open(dst, 'w', recursive=True, write_type=write_type) as f: f.write(data) elapsed_time = time.time() - start_time print('{} seconds'.format(elapsed_time)) total_time += elapsed_time print_stats(args, total_time)
def format_data_for_hive(filename, out_file): client = alluxio.Client('localhost', 39999) first = True num = 0 data = "" with client.open(filename, 'r') as f: with client.open(out_file, 'w') as fout: for line in f: parts = line.split(', ') if (num > 20000): fout.write(data) data = "" num = 1 data = data + ','.join(parts) else: data = data + ','.join(parts) num = num + 1 fout.write(data) print 'Done'
def format_data_for_mahout(filename, out_file): num = 0 data = "" client = alluxio.Client('localhost', 39999) with client.open(filename, 'r') as f: with client.open(out_file, 'w') as fout: for line in f: parts = line.split() parts[2] = "{0:.2f}".format( float(parts[2]) ) # special because original #s have precision of only 2 places after decimal if (num > 20000): fout.write(data) data = "" num = 1 data = data + ','.join(parts) + '\n' else: data = data + ','.join(parts) + '\n' num = num + 1 fout.write(data) print 'Done'
def info(s): print(green(s)) def pretty_json(obj): return json.dumps(obj, indent=2) py_test_root_dir = '/py-test-dir' py_test_nested_dir = '/py-test-dir/nested' py_test = py_test_nested_dir + '/py-test' py_test_renamed = py_test_root_dir + '/py-test-renamed' client = alluxio.Client('localhost', 39999) info("creating directory %s" % py_test_nested_dir) opt = option.CreateDirectory(recursive=True) client.create_directory(py_test_nested_dir, opt) info("done") info("writing to %s" % py_test) with client.open(py_test, 'w') as f: f.write('Alluxio works with Python!\n') with open(sys.argv[0]) as this_file: f.write(this_file) info("done") info("getting status of %s" % py_test) stat = client.get_status(py_test)