def check_status(self, notebook, snippet): api = get_spark_api(self.user) state = api.get_batch_status(snippet['result']['handle']['id']) return { 'status': state, }
def create_session(self, lang='scala', properties=None): if not properties and USE_DEFAULT_CONFIGURATION.get(): user_config = DefaultConfiguration.objects.get_configuration_for_user( app='spark', user=self.user) if user_config is not None: properties = user_config.properties_list props = self.get_livy_props(lang, properties) api = get_spark_api(self.user) response = api.create_session(**props) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) if status['state'] != 'idle': info = '\n'.join(status['log']) if status['log'] else 'timeout' raise QueryError( _('The Spark session is %s and could not be created in the cluster: %s' ) % (status['state'], info)) return { 'type': lang, 'id': response['id'], 'properties': self.to_properties(props) }
def execute(self, notebook, snippet): api = get_spark_api(self.user) if snippet['type'] == 'jar': properties = { 'file': snippet['properties'].get('app_jar'), 'className': snippet['properties'].get('class'), 'args': snippet['properties'].get('arguments'), } elif snippet['type'] == 'py': properties = { 'file': snippet['properties'].get('py_file'), 'args': snippet['properties'].get('argument', []), } else: properties = { 'file': snippet['properties'].get('app_jar'), 'className': snippet['properties'].get('class'), 'args': snippet['properties'].get('arguments'), 'pyFiles': snippet['properties'].get('py_file'), 'files': snippet['properties'].get('files'), # driverMemory # driverCores # executorMemory # executorCores # archives } response = api.submit_batch(properties) return {'id': response['id'], 'has_result_set': True, 'properties': []}
def close_statement(self, notebook, snippet): api = get_spark_api(self.user) session_id = snippet['result']['handle']['id'] if session_id is not None: api.close_batch(session_id) return {'session': session_id, 'status': 0} else: return {'status': -1} # skipped
def close_session(self, session): api = get_spark_api(self.user) if session['id'] is not None: try: api.close(session['id']) return {'session': session['id'], 'status': 0} except RestException as e: if e.code == 404 or e.code == 500: # TODO remove the 500 raise SessionExpired(e) else: return {'status': -1}
def check_status(self, notebook, snippet): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) cell = snippet['result']['handle']['id'] try: response = api.fetch_data(session['id'], cell) return { 'status': response['state'], } except Exception as e: message = force_unicode(str(e)).lower() if re.search("session ('\d+' )?not found", message): raise SessionExpired(e) else: raise e
def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None): if operation != 'hello': raise NotImplementedError() response = {} api = get_spark_api(self.user) api.get_status() response['status'] = 0 response['rows'] = [] return response
def execute(self, notebook, snippet): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) try: response = api.submit_statement(session['id'], snippet['statement']) return { 'id': response['id'], 'has_result_set': True, 'sync': False } except Exception as e: message = force_unicode(str(e)).lower() if re.search( "session ('\d+' )?not found", message ) or 'connection refused' in message or 'session is in state busy' in message: raise SessionExpired(e) else: raise e
def get_api(self): return get_spark_api(self.user, self.interpreter)
def get_log(self, notebook, snippet, startFrom=0, size=None): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) return api.get_log(session['id'], startFrom=startFrom, size=size)
def cancel(self, notebook, snippet): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) response = api.cancel(session['id']) return {'status': 0}
def fetch_result(self, notebook, snippet, rows, start_over): api = get_spark_api(self.user) session = _get_snippet_session(notebook, snippet) cell = snippet['result']['handle']['id'] try: response = api.fetch_data(session['id'], cell) except Exception as e: message = force_unicode(str(e)).lower() if re.search("session ('\d+' )?not found", message): raise SessionExpired(e) else: raise e content = response['output'] if content['status'] == 'ok': data = content['data'] images = [] try: table = data['application/vnd.livy.table.v1+json'] except KeyError: try: images = [data['image/png']] except KeyError: images = [] if 'application/json' in data: result = data['application/json'] data = result['data'] meta = [{ 'name': field['name'], 'type': field['type'], 'comment': '' } for field in result['schema']['fields']] type = 'table' else: data = [[data['text/plain']]] meta = [{ 'name': 'Header', 'type': 'STRING_TYPE', 'comment': '' }] type = 'text' else: data = table['data'] headers = table['headers'] meta = [{ 'name': h['name'], 'type': h['type'], 'comment': '' } for h in headers] type = 'table' # Non start_over not supported if not start_over: data = [] return {'data': data, 'images': images, 'meta': meta, 'type': type} elif content['status'] == 'error': tb = content.get('traceback', None) if tb is None or not tb: msg = content.get('ename', 'unknown error') evalue = content.get('evalue') if evalue is not None: msg = '%s: %s' % (msg, evalue) else: msg = ''.join(tb) raise QueryError(msg)
def create_session(self, lang='scala', properties=None): if not properties: config = None if USE_DEFAULT_CONFIGURATION.get(): config = DefaultConfiguration.objects.get_configuration_for_user( app='spark', user=self.user) if config is not None: properties = config.properties_list else: properties = self.get_properties() props = dict([(p['name'], p['value']) for p in properties]) if properties is not None else {} # HUE-4761: Hue's session request is causing Livy to fail with "JsonMappingException: Can not deserialize # instance of scala.collection.immutable.List out of VALUE_STRING token" due to List type values # not being formed properly, they are quoted csv strings (without brackets) instead of proper List # types, this is for keys; archives, jars, files and pyFiles. The Mako frontend probably should be # modified to pass the values as Livy expects but for now we coerce these types to be Lists. # Issue only occurs when non-default values are used because the default path properly sets the # empty list '[]' for these four values. # Note also that Livy has a 90 second timeout for the session request to complete, this needs to # be increased for requests that take longer, for example when loading large archives. tmp_archives = props['archives'] if type(tmp_archives) is not list: props['archives'] = tmp_archives.split(",") LOG.debug("Check List type: archives was not a list") tmp_jars = props['jars'] if type(tmp_jars) is not list: props['jars'] = tmp_jars.split(",") LOG.debug("Check List type: jars was not a list") tmp_files = props['files'] if type(tmp_files) is not list: props['files'] = tmp_files.split(",") LOG.debug("Check List type: files was not a list") tmp_py_files = props['pyFiles'] if type(tmp_py_files) is not list: props['pyFiles'] = tmp_py_files.split(",") LOG.debug("Check List type: pyFiles was not a list") # Convert the conf list to a dict for Livy LOG.debug("Property Spark Conf kvp list from UI is: " + str(props['conf'])) props['conf'] = { conf.get('key'): conf.get('value') for i, conf in enumerate(props['conf']) } LOG.debug("Property Spark Conf dictionary is: " + str(props['conf'])) props['kind'] = lang api = get_spark_api(self.user) response = api.create_session(**props) status = api.get_session(response['id']) count = 0 while status['state'] == 'starting' and count < 120: status = api.get_session(response['id']) count += 1 time.sleep(1) if status['state'] != 'idle': info = '\n'.join(status['log']) if status['log'] else 'timeout' raise QueryError( _('The Spark session could not be created in the cluster: %s') % info) return {'type': lang, 'id': response['id'], 'properties': properties}
def get_log(self, notebook, snippet, startFrom=0, size=None): api = get_spark_api(self.user) return api.get_batch_log(snippet['result']['handle']['id'], startFrom=startFrom, size=size)