def mock_and_validate_create_new_session(defaults): global client_test create_session_request_mock_uri = base_uri + "/sessions/" app_name = 'Test App' conf_dict = {'spark.app.name': app_name} json_data = { u'kind': u'pyspark', u'log': [], u'proxyUser': None, u'state': u'starting', u'owner': None, u'id': session_id } responses.add(responses.POST, create_session_request_mock_uri, json=json_data, status=201, content_type='application/json') client_test = HttpClient(base_uri, conf_dict=conf_dict, load_defaults=defaults) assert client_test._config.get(client_test._CONFIG_SECTION, 'spark.app.name') == app_name if defaults: assert client_test._config.has_option(client_test._CONFIG_SECTION, 'spark.config') assert client_test._config.get(client_test._CONFIG_SECTION, 'spark.config') == 'override'
def test_connect_to_existing_session(): reconnect_mock_request_uri = base_uri + "/sessions/" + str(session_id) + \ "/connect" reconnect_session_uri = base_uri + "/sessions/" + str(session_id) json_data = { u'kind': u'pyspark', u'log': [], u'proxyUser': None, u'state': u'starting', u'owner': None, u'id': session_id } with responses.RequestsMock() as rsps: rsps.add(responses.POST, reconnect_mock_request_uri, json=json_data, status=201, content_type='application/json') client_reconnect = HttpClient(reconnect_session_uri, load_defaults=False) assert isinstance(client_reconnect, HttpClient) with pytest.raises(NoOptionError): client_reconnect._config.get(client_reconnect._CONFIG_SECTION, 'spark.app.name') == 'Test App'
def create_pyspark_session(self, host): self._host = host data = {'kind': 'pyspark'} # Create a Spark session print('Creating Spark session...') r = requests.post(host + '/sessions', data=json.dumps(data), headers=self._headers) # Wait until the new Spark session is ready to use self._session_url = host + r.headers['location'] r = self._wait_for_state(self._session_url, 'idle') # Create client for Livy batch jobs self._lc = HttpClient(self._session_url)
class LivyHandler: def __init__(self, host='http://localhost:8998'): self._headers = {'Content-Type': 'application/json'} if host is not None: self.create_pyspark_session(host) def _wait_for_state(self, url, desired_state): while True: r = requests.get(url, headers=self._headers) if r.json()['state'] == desired_state: break time.sleep(1) return r def create_pyspark_session(self, host): self._host = host data = {'kind': 'pyspark'} # Create a Spark session print('Creating Spark session...') r = requests.post(host + '/sessions', data=json.dumps(data), headers=self._headers) # Wait until the new Spark session is ready to use self._session_url = host + r.headers['location'] r = self._wait_for_state(self._session_url, 'idle') # Create client for Livy batch jobs self._lc = HttpClient(self._session_url) def exec_str(self, code): print('Submitting code...') statements_url = self._session_url + '/statements' data = {'code': code} r = requests.post(statements_url, data=json.dumps(data), headers=self._headers) # Wait until the code completes print('Running code...') status_url = self._host + r.headers['location'] r = self._wait_for_state(status_url, 'available') output = r.json()['output'] print('output=', output) if output['status'] == 'error': ans = {'text/plain': output['traceback']} else: ans = {'text/plain': [output['data']['text/plain']]} return ans def exec_file(self, py_uri): py_uri_abs = abspath(py_uri) self._lc.upload_pyfile(py_uri_abs) m = splitext(basename(py_uri_abs))[0] try: m_imp = import_module(m) except ImportError: raise def upload_pyfile_job(jc): return m_imp.main(jc.sc) return self._lc.submit(upload_pyfile_job).result() def close(self): print('Closing Spark session...') requests.delete(self._session_url, headers=self._headers)
Usage: pi_app [livy url] [slices] To run this Python script you need to install livy-python-api-*version*.tar.gz with easy_install first. python /pathTo/pi_app.py http://<livy-server>:8998 2 """ if len(sys.argv) != 3: print("Usage: pi_app <livy url> <slices>", file=sys.stderr) exit(-1) slices = int(sys.argv[2]) samples = 100000 * slices client = HttpClient(sys.argv[1]) def f(_): x = random() * 2 - 1 y = random() * 2 - 1 return 1 if x ** 2 + y ** 2 <= 1 else 0 def pi_job(context): count = context.sc.parallelize(range(1, samples + 1), slices).map(f).reduce(add) return 4.0 * count / samples pi = client.submit(pi_job).result() print("Pi is roughly %f" % pi) client.stop(True)
Usage: pi_app [livy url] [slices] To run this Python script you need to install livy-python-api-*version*.tar.gz with easy_install first. python /pathTo/pi_app.py http://<livy-server>:8998 2 """ if len(sys.argv) != 3: print("Usage: pi_app <livy url> <slices>", file=sys.stderr) exit(-1) slices = int(sys.argv[2]) samples = 100000 * slices client = HttpClient(sys.argv[1]) def f(_): x = random() * 2 - 1 y = random() * 2 - 1 return 1 if x**2 + y**2 <= 1 else 0 def pi_job(context): count = context.sc.parallelize(range(1, samples + 1), slices).map(f).reduce(add) return 4.0 * count / samples pi = client.submit(pi_job).result() print("Pi is roughly %f" % pi) client.stop(True)
from flask import Flask, send_file, request, jsonify from flask_restful import Resource, Api from flask_restful import reqparse from livy.client import HttpClient from urlparse import urlparse import service global kdensity_response kdensity_response = None app = Flask(__name__, static_folder="static") api = Api(app) client = HttpClient(urlparse("http://172.16.2.72:8998"), load_defaults=False) service.set_client(client) service.upload_jar_files() class KernelDensityInput(Resource): def post(self): global kdensity_response print "Inside post of Kerneldenisty" parser = reqparse.RequestParser() parser.add_argument('column', type=str, help='Column name') parser.add_argument('bandwidth', type=str, help='Value for bandwidth') parser.add_argument('points', type=str, help='Value for points list') args = parser.parse_args() _column = args['column'] _bandwidth = args['bandwidth'] _points = args['points'] print "column::", _column