def setUp(self): self.user = '******' self.interpreter = { 'name': 'livy', 'options': { 'api_url': 'http://gethue.com:8998' }, } self.api = SparkApi(self.user, self.interpreter)
def setUp(self): self.client = make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False) self.user = User.objects.get(username="******") self.interpreter = { 'name': 'livy', 'options': { 'api_url': 'http://gethue.com:8998' }, } self.api = SparkApi(self.user, self.interpreter)
def notebook(request): notebook_id = request.GET.get('notebook') is_yarn_mode = False try: from spark.conf import LIVY_SERVER_SESSION_KIND is_yarn_mode = LIVY_SERVER_SESSION_KIND.get() except: LOG.exception('Spark is not enabled') return render( 'notebook.mako', request, { 'editor_id': notebook_id or None, 'notebooks_json': '{}', 'options_json': json.dumps({ 'languages': get_interpreters(request.user), 'session_properties': SparkApi.get_properties(), 'is_optimizer_enabled': has_optimizer(), 'is_navigator_enabled': has_navigator(), 'editor_type': 'notebook' }), 'is_yarn_mode': is_yarn_mode, })
def notebook(request): notebook_id = request.GET.get('notebook') if notebook_id: notebook = Notebook(document=Document2.objects.get(id=notebook_id)) else: notebook = Notebook() is_yarn_mode = False try: from spark.conf import LIVY_SERVER_SESSION_KIND is_yarn_mode = LIVY_SERVER_SESSION_KIND.get() except: LOG.exception('Spark is not enabled') return render( 'notebook.mako', request, { 'notebooks_json': json.dumps([notebook.get_data()]), 'options_json': json.dumps({ 'languages': get_interpreters(request.user), 'session_properties': SparkApi.get_properties(), 'is_optimizer_enabled': has_optimizer(), }), 'is_yarn_mode': is_yarn_mode, })
def get_api(request, snippet): from notebook.connectors.hiveserver2 import HS2Api from notebook.connectors.jdbc import JdbcApi from notebook.connectors.rdbms import RdbmsApi from notebook.connectors.pig_batch import PigApi from notebook.connectors.spark_shell import SparkApi from notebook.connectors.spark_batch import SparkBatchApi from notebook.connectors.text import TextApi interpreter = [interpreter for interpreter in get_interpreters(request.user) if interpreter['type'] == snippet['type']] if not interpreter: raise PopupException(_('Snippet type %(type)s is not configured in hue.ini') % snippet) interpreter = interpreter[0] interface = interpreter['interface'] if interface == 'hiveserver2': return HS2Api(user=request.user) elif interface == 'livy': return SparkApi(request.user) elif interface == 'livy-batch': return SparkBatchApi(request.user) elif interface == 'text' or interface == 'markdown': return TextApi(request.user) elif interface == 'rdbms': return RdbmsApi(request.user, interpreter=snippet['type']) elif interface == 'jdbc': return JdbcApi(request.user, interpreter=interpreter) elif interface == 'pig': return PigApi(user=request.user, request=request) else: raise PopupException(_('Notebook connector interface not recognized: %s') % interface)
def notebook(request, is_embeddable=False): if not SHOW_NOTEBOOKS.get() or not request.user.has_hue_permission( action="access", app='notebook'): return serve_403_error(request) notebook_id = request.GET.get('notebook', request.GET.get('editor')) is_yarn_mode = False try: from spark.conf import LIVY_SERVER_SESSION_KIND is_yarn_mode = LIVY_SERVER_SESSION_KIND.get() except: LOG.exception('Spark is not enabled') return render( 'notebook.mako', request, { 'editor_id': notebook_id or None, 'notebooks_json': '{}', 'is_embeddable': request.GET.get('is_embeddable', False), 'options_json': json.dumps({ 'languages': get_ordered_interpreters(request.user), 'session_properties': SparkApi.get_properties(), 'is_optimizer_enabled': has_optimizer(), 'is_wa_enabled': has_workload_analytics(), 'is_navigator_enabled': has_catalog(request.user), 'editor_type': 'notebook' }), 'is_yarn_mode': is_yarn_mode, })
def notebook(request, is_embeddable=False): notebook_id = request.GET.get("notebook") is_yarn_mode = False try: from spark.conf import LIVY_SERVER_SESSION_KIND is_yarn_mode = LIVY_SERVER_SESSION_KIND.get() except: LOG.exception("Spark is not enabled") template = "notebook.mako" if is_embeddable: template = "notebook_embeddable.mako" return render( template, request, { "editor_id": notebook_id or None, "notebooks_json": "{}", "options_json": json.dumps( { "languages": get_ordered_interpreters(request.user), "session_properties": SparkApi.get_properties(), "is_optimizer_enabled": has_optimizer(), "is_navigator_enabled": has_navigator(request.user), "editor_type": "notebook", } ), "is_yarn_mode": is_yarn_mode, }, )
def get_api(user, snippet, fs, jt): from notebook.connectors.hiveserver2 import HS2Api from notebook.connectors.jdbc import JdbcApi from notebook.connectors.mysql import MySqlApi from notebook.connectors.pig_batch import PigApi from notebook.connectors.spark_shell import SparkApi from notebook.connectors.spark_batch import SparkBatchApi from notebook.connectors.text import TextApi interpreter = [interpreter for interpreter in get_interpreters() if interpreter['type'] == snippet['type']] if not interpreter: raise PopupException(_('Snippet type %(type)s is not configured in hue.ini') % snippet) interpreter = interpreter[0] interface = interpreter['interface'] if interface == 'hiveserver2': return HS2Api(user) elif interface == 'livy': return SparkApi(user) elif interface == 'livy-batch': return SparkBatchApi(user) elif interface == 'text': return TextApi(user) elif interface == 'mysql': return MySqlApi(user) elif interface == 'jdbc': return JdbcApi(user, interpreter=interpreter) elif interface == 'pig': return PigApi(user, fs=fs, jt=jt) else: raise PopupException(_('Notebook connector interface not recognized: %s') % interface)
class TestSparkApi(object): def setUp(self): self.user = '******' self.api = SparkApi(self.user) def test_create_session_plain(self): lang = 'pyspark' properties = None with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api: get_spark_api.return_value = Mock( create_session=Mock( return_value={'id': '1'} ), get_session=Mock( return_value={'state': 'idle', 'log': ''} ) ) session = self.api.create_session(lang=lang, properties=properties) assert_equal(session['type'], 'pyspark') assert_equal(session['id'], '1') files_properties = [prop for prop in session['properties'] if prop['name'] == 'files'] assert_true(files_properties, session['properties']) assert_equal(files_properties[0]['value'], [], session['properties']) def test_get_jobs(self): local_jobs = [ {'url': u'http://172.21.1.246:4040/jobs/job/?id=0', 'name': u'0'} ] jobs = self.api._get_standalone_jobs(LIVY_STANDALONE_LOG) assert_equal(jobs, local_jobs, jobs) yarn_jobs = [ {'url': u'http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/', 'name': u'application_1444070328046_0002'} ] jobs = self.api._get_yarn_jobs(LIVY_YARN_LOG) assert_equal(jobs, yarn_jobs, jobs)
def get_api(request, snippet): from notebook.connectors.dataeng import DataEngApi from notebook.connectors.hiveserver2 import HS2Api from notebook.connectors.jdbc import JdbcApi from notebook.connectors.rdbms import RdbmsApi from notebook.connectors.oozie_batch import OozieApi from notebook.connectors.solr import SolrApi from notebook.connectors.spark_shell import SparkApi from notebook.connectors.spark_batch import SparkBatchApi from notebook.connectors.text import TextApi if snippet.get('wasBatchExecuted'): return OozieApi(user=request.user, request=request) interpreter = [interpreter for interpreter in get_ordered_interpreters(request.user) if interpreter['type'] == snippet['type']] if not interpreter: raise PopupException(_('Snippet type %(type)s is not configured in hue.ini') % snippet) interpreter = interpreter[0] interface = interpreter['interface'] # Multi cluster cluster = Cluster(request.user) if cluster and cluster.get_type() == 'dataeng': interface = 'dataeng' if interface == 'hiveserver2': return HS2Api(user=request.user, request=request) elif interface == 'oozie': return OozieApi(user=request.user, request=request) elif interface == 'livy': return SparkApi(request.user) elif interface == 'livy-batch': return SparkBatchApi(request.user) elif interface == 'text' or interface == 'markdown': return TextApi(request.user) elif interface == 'rdbms': return RdbmsApi(request.user, interpreter=snippet['type']) elif interface == 'dataeng': return DataEngApi(user=request.user, request=request, cluster_name=cluster.get_interface()) elif interface == 'jdbc': return JdbcApi(request.user, interpreter=interpreter) elif interface == 'solr': return SolrApi(request.user, interpreter=interpreter) elif interface == 'pig': return OozieApi(user=request.user, request=request) # Backward compatibility until Hue 4 else: raise PopupException(_('Notebook connector interface not recognized: %s') % interface)
def notebook(request): notebook_id = request.GET.get('notebook') is_yarn_mode = False try: from spark.conf import LIVY_SERVER_SESSION_KIND is_yarn_mode = LIVY_SERVER_SESSION_KIND.get() except: LOG.exception('Spark is not enabled') return render('notebook.mako', request, { 'editor_id': notebook_id or None, 'notebooks_json': '{}', 'options_json': json.dumps({ 'languages': get_interpreters(request.user), 'session_properties': SparkApi.get_properties(), 'is_optimizer_enabled': has_optimizer(), }), 'is_yarn_mode': is_yarn_mode, })
def notebook(request): notebook_id = request.GET.get('notebook') if notebook_id: notebook = Notebook(document=Document2.objects.get(id=notebook_id)) else: notebook = Notebook() is_yarn_mode = False try: from spark.conf import LIVY_SERVER_SESSION_KIND is_yarn_mode = LIVY_SERVER_SESSION_KIND.get() except: LOG.exception('Spark is not enabled') return render('notebook.mako', request, { 'notebooks_json': json.dumps([notebook.get_data()]), 'options_json': json.dumps({ 'languages': get_interpreters(request.user), 'session_properties': SparkApi.get_properties(), 'is_optimizer_enabled': has_optimizer(), }), 'is_yarn_mode': is_yarn_mode, })
def get_api(request, snippet): from notebook.connectors.oozie_batch import OozieApi if snippet.get('wasBatchExecuted'): return OozieApi(user=request.user, request=request) if snippet['type'] == 'report': snippet['type'] = 'impala' interpreter = [ interpreter for interpreter in get_ordered_interpreters(request.user) if interpreter['type'] == snippet['type'] ] if not interpreter: if snippet['type'] == 'hbase': interpreter = [{ 'name': 'hbase', 'type': 'hbase', 'interface': 'hbase', 'options': {}, 'is_sql': False }] elif snippet['type'] == 'kafka': interpreter = [{ 'name': 'kafka', 'type': 'kafka', 'interface': 'kafka', 'options': {}, 'is_sql': False }] elif snippet['type'] == 'solr': interpreter = [{ 'name': 'solr', 'type': 'solr', 'interface': 'solr', 'options': {}, 'is_sql': False }] else: raise PopupException( _('Snippet type %(type)s is not configured in hue.ini') % snippet) interpreter = interpreter[0] interface = interpreter['interface'] # Multi cluster cluster = json.loads(request.POST.get('cluster', '""')) # Via Catalog API if cluster == 'undefined': cluster = None if not cluster and snippet.get('compute'): # Via notebook.ko.js cluster = snippet.get('compute').get('id') if cluster and 'crn:altus:dataware:' in cluster: interface = 'altus-adb' if cluster: LOG.info('Selected cluster %s' % cluster) if interface == 'hiveserver2': from notebook.connectors.hiveserver2 import HS2Api return HS2Api(user=request.user, request=request, cluster=cluster) elif interface == 'oozie': return OozieApi(user=request.user, request=request) elif interface == 'livy': from notebook.connectors.spark_shell import SparkApi return SparkApi(request.user) elif interface == 'livy-batch': from notebook.connectors.spark_batch import SparkBatchApi return SparkBatchApi(request.user) elif interface == 'text' or interface == 'markdown': from notebook.connectors.text import TextApi return TextApi(request.user) elif interface == 'rdbms': from notebook.connectors.rdbms import RdbmsApi return RdbmsApi(request.user, interpreter=snippet['type']) elif interface == 'altus-adb': from notebook.connectors.altus_adb import AltusAdbApi return AltusAdbApi(user=request.user, cluster_name=cluster, request=request) elif interface == 'dataeng': from notebook.connectors.dataeng import DataEngApi return DataEngApi(user=request.user, request=request, cluster_name=cluster.get('name')) elif interface == 'jdbc' or interface == 'teradata': from notebook.connectors.jdbc import JdbcApi return JdbcApi(request.user, interpreter=interpreter) elif interface == 'sqlalchemy': from notebook.connectors.sqlalchemyapi import SqlAlchemyApi return SqlAlchemyApi(request.user, interpreter=interpreter) elif interface == 'solr': from notebook.connectors.solr import SolrApi return SolrApi(request.user, interpreter=interpreter) elif interface == 'hbase': from notebook.connectors.hbase import HBaseApi return HBaseApi(request.user) elif interface == 'kafka': from notebook.connectors.kafka import KafkaApi return KafkaApi(request.user) elif interface == 'pig': return OozieApi(user=request.user, request=request) # Backward compatibility until Hue 4 else: raise PopupException( _('Notebook connector interface not recognized: %s') % interface)
def get_api(request, snippet): from notebook.connectors.oozie_batch import OozieApi if snippet.get('wasBatchExecuted'): return OozieApi(user=request.user, request=request) if snippet['type'] == 'report': snippet['type'] = 'impala' interpreter = [ interpreter for interpreter in get_ordered_interpreters(request.user) if snippet['type'] in (interpreter['type'], interpreter['interface']) ] if not interpreter: if snippet['type'] == 'hbase': interpreter = [{ 'name': 'hbase', 'type': 'hbase', 'interface': 'hbase', 'options': {}, 'is_sql': False }] elif snippet['type'] == 'kafka': interpreter = [{ 'name': 'kafka', 'type': 'kafka', 'interface': 'kafka', 'options': {}, 'is_sql': False }] elif snippet['type'] == 'solr': interpreter = [{ 'name': 'solr', 'type': 'solr', 'interface': 'solr', 'options': {}, 'is_sql': False }] elif snippet['type'] == 'custom': interpreter = [{ 'name': snippet['name'], 'type': snippet['type'], 'interface': snippet['interface'], 'options': snippet.get('options', {}), 'is_sql': False }] else: raise PopupException( _('Snippet type %(type)s is not configured.') % snippet) interpreter = interpreter[0] interface = interpreter['interface'] if CONNECTORS.IS_ENABLED.get(): cluster = { 'connector': snippet['type'], 'id': interpreter['type'], } snippet['type'] = snippet['type'].split('-', 2)[0] cluster.update(interpreter['options']) # Multi cluster elif has_multi_cluster(): cluster = json.loads(request.POST.get( 'cluster', '""')) # Via Catalog autocomplete API or Notebook create sessions if cluster == '""' or cluster == 'undefined': cluster = None if not cluster and snippet.get('compute'): # Via notebook.ko.js cluster = snippet['compute'] else: cluster = None cluster_name = cluster.get('id') if cluster else None if cluster and 'altus:dataware:k8s' in cluster_name: interface = 'hiveserver2' elif cluster and 'crn:altus:dataware:' in cluster_name: interface = 'altus-adb' elif cluster and 'crn:altus:dataeng:' in cluster_name: interface = 'dataeng' LOG.info('Selected cluster %s %s interface %s' % (cluster_name, cluster, interface)) snippet['interface'] = interface if interface.startswith('hiveserver2') or interface == 'hms': from notebook.connectors.hiveserver2 import HS2Api return HS2Api(user=request.user, request=request, cluster=cluster, interface=interface) elif interface == 'oozie': return OozieApi(user=request.user, request=request) elif interface == 'livy': from notebook.connectors.spark_shell import SparkApi return SparkApi(request.user) elif interface == 'livy-batch': from notebook.connectors.spark_batch import SparkBatchApi return SparkBatchApi(request.user) elif interface == 'text' or interface == 'markdown': from notebook.connectors.text import TextApi return TextApi(request.user) elif interface == 'rdbms': from notebook.connectors.rdbms import RdbmsApi return RdbmsApi(request.user, interpreter=snippet['type'], query_server=snippet.get('query_server')) elif interface == 'altus-adb': from notebook.connectors.altus_adb import AltusAdbApi return AltusAdbApi(user=request.user, cluster_name=cluster_name, request=request) elif interface == 'dataeng': from notebook.connectors.dataeng import DataEngApi return DataEngApi(user=request.user, request=request, cluster_name=cluster_name) elif interface == 'jdbc': if interpreter['options'] and interpreter['options'].get( 'url', '').find('teradata') >= 0: from notebook.connectors.jdbc_teradata import JdbcApiTeradata return JdbcApiTeradata(request.user, interpreter=interpreter) if interpreter['options'] and interpreter['options'].get( 'url', '').find('awsathena') >= 0: from notebook.connectors.jdbc_athena import JdbcApiAthena return JdbcApiAthena(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get( 'url', '').find('presto') >= 0: from notebook.connectors.jdbc_presto import JdbcApiPresto return JdbcApiPresto(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get( 'url', '').find('clickhouse') >= 0: from notebook.connectors.jdbc_clickhouse import JdbcApiClickhouse return JdbcApiClickhouse(request.user, interpreter=interpreter) else: from notebook.connectors.jdbc import JdbcApi return JdbcApi(request.user, interpreter=interpreter) elif interface == 'teradata': from notebook.connectors.jdbc import JdbcApiTeradata return JdbcApiTeradata(request.user, interpreter=interpreter) elif interface == 'athena': from notebook.connectors.jdbc import JdbcApiAthena return JdbcApiAthena(request.user, interpreter=interpreter) elif interface == 'presto': from notebook.connectors.jdbc_presto import JdbcApiPresto return JdbcApiPresto(request.user, interpreter=interpreter) elif interface == 'sqlalchemy': from notebook.connectors.sqlalchemyapi import SqlAlchemyApi return SqlAlchemyApi(request.user, interpreter=interpreter) elif interface == 'solr': from notebook.connectors.solr import SolrApi return SolrApi(request.user, interpreter=interpreter) elif interface == 'hbase': from notebook.connectors.hbase import HBaseApi return HBaseApi(request.user) elif interface == 'kafka': from notebook.connectors.kafka import KafkaApi return KafkaApi(request.user) elif interface == 'pig': return OozieApi(user=request.user, request=request) # Backward compatibility until Hue 4 else: raise PopupException( _('Notebook connector interface not recognized: %s') % interface)
class TestSparkApi(object): def setUp(self): self.user = '******' self.interpreter = { 'name': 'livy', 'options': { 'api_url': 'http://gethue.com:8998' }, } self.api = SparkApi(self.user, self.interpreter) def test_get_api(self): lang = 'pyspark' properties = None # with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api: spark_api = self.api.get_api() assert_equal(spark_api.__class__.__name__, 'LivyClient') def test_get_livy_props_method(self): test_properties = [{ "name": "files", "value": 'file_a,file_b,file_c', }] props = self.api.get_livy_props('scala', test_properties) assert_equal(props['files'], ['file_a', 'file_b', 'file_c']) def test_create_session_with_config(self): lang = 'pyspark' properties = None with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api: with patch('notebook.connectors.spark_shell.DefaultConfiguration') as DefaultConfiguration: with patch('notebook.connectors.spark_shell.USE_DEFAULT_CONFIGURATION') as USE_DEFAULT_CONFIGURATION: DefaultConfiguration.objects.get_configuration_for_user.return_value = Mock( properties_list=[ {'multiple': False, 'name': 'driverCores', 'defaultValue': 1, 'value': 2, 'nice_name': 'Driver Cores', 'help_text': 'Number of cores used by the driver, only in cluster mode (Default: 1)', 'type': 'number', 'is_yarn': True}] ) get_spark_api.return_value = Mock( create_session=Mock( return_value={'id': '1'} ), get_session=Mock( return_value={'state': 'idle', 'log': ''} ) ) # Case with user configuration. Expected 2 driverCores USE_DEFAULT_CONFIGURATION.get.return_value = True session = self.api.create_session(lang=lang, properties=properties) assert_equal(session['type'], 'pyspark') assert_equal(session['id'], '1') for p in session['properties']: if p['name'] == 'driverCores': cores = p['value'] assert_equal(cores, 2) # Case without user configuration. Expected 1 driverCores USE_DEFAULT_CONFIGURATION.get.return_value = True DefaultConfiguration.objects.get_configuration_for_user.return_value = None session2 = self.api.create_session(lang=lang, properties=properties) assert_equal(session2['type'], 'pyspark') assert_equal(session2['id'], '1') for p in session2['properties']: if p['name'] == 'driverCores': cores = p['value'] assert_equal(cores, 1) # Case with no user configuration. Expected 1 driverCores USE_DEFAULT_CONFIGURATION.get.return_value = False session3 = self.api.create_session(lang=lang, properties=properties) assert_equal(session3['type'], 'pyspark') assert_equal(session3['id'], '1') for p in session3['properties']: if p['name'] == 'driverCores': cores = p['value'] assert_equal(cores, 1) def test_create_session_plain(self): lang = 'pyspark' properties = None with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api: get_spark_api.return_value = Mock( create_session=Mock( return_value={'id': '1'} ), get_session=Mock( return_value={'state': 'idle', 'log': ''} ) ) session = self.api.create_session(lang=lang, properties=properties) assert_equal(session['type'], 'pyspark') assert_equal(session['id'], '1') files_properties = [prop for prop in session['properties'] if prop['name'] == 'files'] assert_true(files_properties, session['properties']) assert_equal(files_properties[0]['value'], [], session['properties']) def test_get_jobs(self): local_jobs = [ {'url': u'http://172.21.1.246:4040/jobs/job/?id=0', 'name': u'0'} ] jobs = self.api._get_standalone_jobs(LIVY_STANDALONE_LOG) assert_equal(jobs, local_jobs, jobs) yarn_jobs = [ {'url': u'http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/', 'name': u'application_1444070328046_0002'} ] jobs = self.api._get_yarn_jobs(LIVY_YARN_LOG) assert_equal(jobs, yarn_jobs, jobs)
def setUp(self): self.user = "******" self.api = SparkApi(self.user)
class TestSparkShellConnector(object): LIVY_STANDALONE_LOG = """ Starting livy-repl on http://172.21.1.246:58449 Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties 15/10/05 14:02:33 INFO SparkContext: Running Spark version 1.5.0 15/10/05 14:02:33 INFO SecurityManager: Changing view acls to: huetest 15/10/05 14:02:33 INFO SecurityManager: Changing modify acls to: huetest 15/10/05 14:02:33 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(huetest); users with modify permissions: Set(huetest) 15/10/05 14:02:33 INFO Slf4jLogger: Slf4jLogger started 15/10/05 14:02:33 INFO Remoting: Starting remoting 15/10/05 14:02:33 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://[email protected]:58451] 15/10/05 14:02:33 INFO Utils: Successfully started service 'sparkDriver' on port 58451. 15/10/05 14:02:33 INFO SparkEnv: Registering MapOutputTracker 15/10/05 14:02:33 INFO SparkEnv: Registering BlockManagerMaster 15/10/05 14:02:33 INFO DiskBlockManager: Created local directory at /private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/blockmgr-f63fdd28-6d86-4ae6-a91c-902fb0310fb4 15/10/05 14:02:33 INFO MemoryStore: MemoryStore started with capacity 530.0 MB 15/10/05 14:02:33 INFO HttpFileServer: HTTP File server directory is /private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-a0e35333-e2be-4b83-8a7e-3cb468270dc2/httpd-0235b01f-ee8b-40fd-96a9-de946b1a3426 15/10/05 14:02:33 INFO HttpServer: Starting HTTP Server 15/10/05 14:02:33 INFO Utils: Successfully started service 'HTTP file server' on port 58452. 15/10/05 14:02:33 INFO SparkEnv: Registering OutputCommitCoordinator 15/10/05 14:02:33 INFO Utils: Successfully started service 'SparkUI' on port 4040. 15/10/05 14:02:33 INFO SparkUI: Started SparkUI at http://172.21.1.246:4040 15/10/05 14:02:34 INFO SparkContext: Added JAR file:/Users/huetest/Dev/hue/apps/spark/java/livy-assembly/target/scala-2.10/livy-assembly-3.9.0-SNAPSHOT.jar at http://172.21.1.246:58452/jars/livy-assembly-3.9.0-SNAPSHOT.jar with timestamp 1444078954103 15/10/05 14:02:34 WARN MetricsSystem: Using default name DAGScheduler for source because spark.app.id is not set. 15/10/05 14:02:34 INFO Executor: Starting executor ID driver on host localhost 15/10/05 14:02:34 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 58453. 15/10/05 14:02:34 INFO NettyBlockTransferService: Server created on 58453 15/10/05 14:02:34 INFO BlockManagerMaster: Trying to register BlockManager 15/10/05 14:02:34 INFO BlockManagerMasterEndpoint: Registering block manager localhost:58453 with 530.0 MB RAM, BlockManagerId(driver, localhost, 58453) 15/10/05 14:02:34 INFO BlockManagerMaster: Registered BlockManager 15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(130448) called with curMem=0, maxMem=555755765 15/10/05 14:02:36 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 127.4 KB, free 529.9 MB) 15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(14276) called with curMem=130448, maxMem=555755765 15/10/05 14:02:36 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 13.9 KB, free 529.9 MB) 15/10/05 14:02:36 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:58453 (size: 13.9 KB, free: 530.0 MB) 15/10/05 14:02:36 INFO SparkContext: Created broadcast 0 from textFile at NativeMethodAccessorImpl.java:-2 15/10/05 14:02:36 INFO FileInputFormat: Total input paths to process : 1 15/10/05 14:02:36 INFO SparkContext: Starting job: collect at <stdin>:1 15/10/05 14:02:36 INFO DAGScheduler: Registering RDD 3 (reduceByKey at <stdin>:1) 15/10/05 14:02:36 INFO DAGScheduler: Registering RDD 7 (combineByKey at <stdin>:3) 15/10/05 14:02:36 INFO DAGScheduler: Got job 0 (collect at <stdin>:1) with 2 output partitions 15/10/05 14:02:36 INFO DAGScheduler: Final stage: ResultStage 2(collect at <stdin>:1) 15/10/05 14:02:36 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 1) 15/10/05 14:02:36 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 1) 15/10/05 14:02:36 INFO DAGScheduler: Submitting ShuffleMapStage 0 (PairwiseRDD[3] at reduceByKey at <stdin>:1), which has no missing parents 15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(8960) called with curMem=144724, maxMem=555755765 15/10/05 14:02:36 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 8.8 KB, free 529.9 MB) 15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(5483) called with curMem=153684, maxMem=555755765 15/10/05 14:02:36 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 5.4 KB, free 529.9 MB) 15/10/05 14:02:36 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:58453 (size: 5.4 KB, free: 530.0 MB) 15/10/05 14:02:36 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:861 15/10/05 14:02:36 INFO DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 0 (PairwiseRDD[3] at reduceByKey at <stdin>:1) 15/10/05 14:02:36 INFO TaskSchedulerImpl: Adding task set 0.0 with 2 tasks 15/10/05 14:02:36 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, PROCESS_LOCAL, 2266 bytes) 15/10/05 14:02:36 INFO TaskSetManager: Starting task 1.0 in stage 0.0 (TID 1, localhost, PROCESS_LOCAL, 2266 bytes) 15/10/05 14:02:36 INFO Executor: Running task 0.0 in stage 0.0 (TID 0) 15/10/05 14:02:36 INFO Executor: Running task 1.0 in stage 0.0 (TID 1) 15/10/05 14:02:36 INFO Executor: Fetching http://172.21.1.246:58452/jars/livy-assembly-3.9.0-SNAPSHOT.jar with timestamp 1444078954103 15/10/05 14:02:36 INFO Utils: Fetching http://172.21.1.246:58452/jars/livy-assembly-3.9.0-SNAPSHOT.jar to /private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-a0e35333-e2be-4b83-8a7e-3cb468270dc2/userFiles-d0940846-b38a-4e4d-af07-8419b364d7ff/fetchFileTemp476551478197543813.tmp 15/10/05 14:02:36 INFO Executor: Adding file:/private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-a0e35333-e2be-4b83-8a7e-3cb468270dc2/userFiles-d0940846-b38a-4e4d-af07-8419b364d7ff/livy-assembly-3.9.0-SNAPSHOT.jar to class loader 15/10/05 14:02:36 INFO HadoopRDD: Input split: file:/Users/huetest/Downloads/babs_open_data_year_1/201402_babs_open_data/201402_trip_data.csv:0+8609511 15/10/05 14:02:36 INFO HadoopRDD: Input split: file:/Users/huetest/Downloads/babs_open_data_year_1/201402_babs_open_data/201402_trip_data.csv:8609511+8609511 15/10/05 14:02:36 INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id 15/10/05 14:02:36 INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id 15/10/05 14:02:36 INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap 15/10/05 14:02:36 INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition 15/10/05 14:02:36 INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id 15/10/05 14:02:37 INFO PythonRDD: Times: total = 727, boot = 229, init = 44, finish = 454 15/10/05 14:02:37 INFO PythonRDD: Times: total = 730, boot = 226, init = 46, finish = 458 15/10/05 14:02:37 INFO Executor: Finished task 1.0 in stage 0.0 (TID 1). 2318 bytes result sent to driver 15/10/05 14:02:37 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 2318 bytes result sent to driver 15/10/05 14:02:37 INFO TaskSetManager: Finished task 1.0 in stage 0.0 (TID 1) in 950 ms on localhost (1/2) 15/10/05 14:02:37 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 962 ms on localhost (2/2) 15/10/05 14:02:37 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool 15/10/05 14:02:37 INFO DAGScheduler: ShuffleMapStage 0 (reduceByKey at <stdin>:1) finished in 0.973 s 15/10/05 14:02:37 INFO DAGScheduler: looking for newly runnable stages """ LIVY_YARN_LOG = """ 15/10/05 13:51:21 INFO client.RMProxy: Connecting to ResourceManager at huetest-1.test.com/175.18.213.12:8032 15/10/05 13:51:21 INFO yarn.Client: Requesting a new application from cluster with 3 NodeManagers 15/10/05 13:51:21 INFO yarn.Client: Verifying our application has not requested more than the maximum memory capability of the cluster (2048 MB per container) 15/10/05 13:51:21 INFO yarn.Client: Will allocate AM container, with 1408 MB memory including 384 MB overhead 15/10/05 13:51:21 INFO yarn.Client: Setting up container launch context for our AM 15/10/05 13:51:21 INFO yarn.Client: Setting up the launch environment for our AM container 15/10/05 13:51:21 INFO yarn.Client: Preparing resources for our AM container 15/10/05 13:51:21 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/10/05 13:51:21 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/spark-1.5.0-bin-hadoop2.6/lib/spark-assembly-1.5.0-hadoop2.6.0.jar -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/spark-assembly-1.5.0-hadoop2.6.0.jar 15/10/05 13:52:00 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/hue/apps/spark/java/livy-assembly/target/scala-2.10/livy-assembly-3.9.0-SNAPSHOT.jar -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/livy-assembly-3.9.0-SNAPSHOT.jar 15/10/05 13:52:09 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/spark-1.5.0-bin-hadoop2.6/python/lib/pyspark.zip -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/pyspark.zip 15/10/05 13:52:09 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/spark-1.5.0-bin-hadoop2.6/python/lib/py4j-0.8.2.1-src.zip -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/py4j-0.8.2.1-src.zip 15/10/05 13:52:10 INFO yarn.Client: Uploading resource file:/private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-3bde33db-374c-4abe-a4af-704bd5dc09d2/__spark_conf__4420686202746650998.zip -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/__spark_conf__4420686202746650998.zip 15/10/05 13:52:10 INFO spark.SecurityManager: Changing view acls to: huetest 15/10/05 13:52:10 INFO spark.SecurityManager: Changing modify acls to: huetest 15/10/05 13:52:10 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(huetest); users with modify permissions: Set(huetest) 15/10/05 13:52:10 INFO yarn.Client: Submitting application 2 to ResourceManager 15/10/05 13:52:10 INFO impl.YarnClientImpl: Submitted application application_1444070328046_0002 15/10/05 13:52:11 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:11 INFO yarn.Client: client token: N/A diagnostics: N/A ApplicationMaster host: N/A ApplicationMaster RPC port: -1 queue: root.huetest start time: 1444078329419 final status: UNDEFINED tracking URL: http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/ user: huetest 15/10/05 13:52:12 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:13 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:14 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:16 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:17 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:18 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:19 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:20 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:21 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:22 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:23 INFO yarn.Client: Application report for application_1444070328046_0002 (state: RUNNING) 15/10/05 13:52:23 INFO yarn.Client: client token: N/A diagnostics: N/A ApplicationMaster host: 175.18.213.12 ApplicationMaster RPC port: 0 queue: root.huetest start time: 1444078329419 final status: UNDEFINED tracking URL: http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/ user: huetest 15/10/05 13:52:24 INFO yarn.Client: Application report for application_1444070328046_0002 (state: RUNNING) """ def setUp(self): self.user = '******' self.api = SparkApi(self.user) def test_get_jobs(self): local_jobs = [ {'url': u'http://172.21.1.246:4040/jobs/job/?id=0', 'name': u'0'} ] jobs = self.api._get_standalone_jobs(self.LIVY_STANDALONE_LOG) assert_equal(jobs, local_jobs, jobs) yarn_jobs = [ {'url': u'http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/', 'name': u'application_1444070328046_0002'} ] jobs = self.api._get_yarn_jobs(self.LIVY_YARN_LOG) assert_equal(jobs, yarn_jobs, jobs)
class TestSparkApi(object): def setUp(self): self.user = '******' self.interpreter = { 'name': 'livy', 'options': { 'api_url': 'http://gethue.com:8998' }, } self.api = SparkApi(self.user, self.interpreter) def test_get_api(self): lang = 'pyspark' properties = None # with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api: spark_api = self.api.get_api() assert_equal(spark_api.__class__.__name__, 'LivyClient') def test_get_livy_props_method(self): test_properties = [{ "name": "files", "value": 'file_a,file_b,file_c', }] props = self.api.get_livy_props('scala', test_properties) assert_equal(props['files'], ['file_a', 'file_b', 'file_c']) def test_create_session_with_config(self): lang = 'pyspark' properties = None session_key = self.api._get_session_key() with patch('notebook.connectors.spark_shell.get_spark_api' ) as get_spark_api: with patch('notebook.connectors.spark_shell.DefaultConfiguration' ) as DefaultConfiguration: with patch( 'notebook.connectors.spark_shell.USE_DEFAULT_CONFIGURATION' ) as USE_DEFAULT_CONFIGURATION: DefaultConfiguration.objects.get_configuration_for_user.return_value = Mock( properties_list=[{ 'multiple': False, 'name': 'driverCores', 'defaultValue': 1, 'value': 2, 'nice_name': 'Driver Cores', 'help_text': 'Number of cores used by the driver, only in cluster mode (Default: 1)', 'type': 'number', 'is_yarn': True }]) get_spark_api.return_value = Mock( create_session=Mock(return_value={'id': '1'}), get_session=Mock(return_value={ 'state': 'idle', 'log': '' })) # Case with user configuration. Expected 2 driverCores USE_DEFAULT_CONFIGURATION.get.return_value = True session = self.api.create_session(lang=lang, properties=properties) assert_equal(session['type'], 'pyspark') assert_equal(session['id'], '1') for p in session['properties']: if p['name'] == 'driverCores': cores = p['value'] assert_equal(cores, 2) if SESSIONS.get(session_key): del SESSIONS[session_key] # Case without user configuration. Expected 1 driverCores USE_DEFAULT_CONFIGURATION.get.return_value = True DefaultConfiguration.objects.get_configuration_for_user.return_value = None session2 = self.api.create_session(lang=lang, properties=properties) assert_equal(session2['type'], 'pyspark') assert_equal(session2['id'], '1') for p in session2['properties']: if p['name'] == 'driverCores': cores = p['value'] assert_equal(cores, 1) if SESSIONS.get(session_key): del SESSIONS[session_key] # Case with no user configuration. Expected 1 driverCores USE_DEFAULT_CONFIGURATION.get.return_value = False session3 = self.api.create_session(lang=lang, properties=properties) assert_equal(session3['type'], 'pyspark') assert_equal(session3['id'], '1') for p in session3['properties']: if p['name'] == 'driverCores': cores = p['value'] assert_equal(cores, 1) if SESSIONS.get(session_key): del SESSIONS[session_key] def test_create_session_plain(self): lang = 'pyspark' properties = None session_key = self.api._get_session_key() with patch('notebook.connectors.spark_shell.get_spark_api' ) as get_spark_api: get_spark_api.return_value = Mock( create_session=Mock(return_value={'id': '1'}), get_session=Mock(return_value={ 'state': 'idle', 'log': '' })) session = self.api.create_session(lang=lang, properties=properties) assert_equal(session['type'], 'pyspark') assert_equal(session['id'], '1') files_properties = [ prop for prop in session['properties'] if prop['name'] == 'files' ] assert_true(files_properties, session['properties']) assert_equal(files_properties[0]['value'], [], session['properties']) if SESSIONS.get(session_key): del SESSIONS[session_key] def test_execute(self): with patch('notebook.connectors.spark_shell._get_snippet_session' ) as _get_snippet_session: with patch('notebook.connectors.spark_shell.get_spark_api' ) as get_spark_api: notebook = Mock() snippet = {'statement': 'select * from test_table'} _get_snippet_session.return_value = {'id': '1'} get_spark_api.return_value = Mock(submit_statement=Mock( return_value={'id': 'test_id'})) response = self.api.execute(notebook, snippet) assert_equal(response['id'], 'test_id') get_spark_api.return_value = Mock(submit_statement=Mock()) assert_raises(Exception, self.api.execute, notebook, snippet) def test_check_status(self): with patch('notebook.connectors.spark_shell._get_snippet_session' ) as _get_snippet_session: with patch('notebook.connectors.spark_shell.get_spark_api' ) as get_spark_api: notebook = Mock() snippet = {'result': {'handle': {'id': {'test_id'}}}} _get_snippet_session.return_value = {'id': '1'} get_spark_api.return_value = Mock(fetch_data=Mock( return_value={'state': 'test_state'})) response = self.api.check_status(notebook, snippet) assert_equal(response['status'], 'test_state') get_spark_api.return_value = Mock(submit_statement=Mock()) assert_raises(Exception, self.api.check_status, notebook, snippet) def test_get_sample_data(self): snippet = Mock() self.api._execute = Mock(return_value='test_value') self.api._check_status_and_fetch_result = Mock(return_value={ 'data': 'test_data', 'meta': 'test_meta' }) response = self.api.get_sample_data(snippet, 'test_db', 'test_table', 'test_column') assert_equal(response['rows'], 'test_data') assert_equal(response['full_headers'], 'test_meta') def test_get_select_query(self): # With operation as 'hello' response = self.api._get_select_query('test_db', 'test_table', 'test_column', 'hello') assert_equal(response, "SELECT 'Hello World!'") # Without column name response = self.api._get_select_query('test_db', 'test_table') assert_equal(response, 'SELECT *\nFROM test_db.test_table\nLIMIT 100\n') # With some column name response = self.api._get_select_query('test_db', 'test_table', 'test_column') assert_equal( response, 'SELECT test_column\nFROM test_db.test_table\nLIMIT 100\n') def test_get_jobs(self): local_jobs = [{ 'url': u'http://172.21.1.246:4040/jobs/job/?id=0', 'name': u'0' }] jobs = self.api._get_standalone_jobs(LIVY_STANDALONE_LOG) assert_equal(jobs, local_jobs, jobs) yarn_jobs = [{ 'url': u'http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/', 'name': u'application_1444070328046_0002' }] jobs = self.api._get_yarn_jobs(LIVY_YARN_LOG) assert_equal(jobs, yarn_jobs, jobs)
class TestSparkShellConnector(object): LIVY_STANDALONE_LOG = """ Starting livy-repl on http://172.21.1.246:58449 Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties 15/10/05 14:02:33 INFO SparkContext: Running Spark version 1.5.0 15/10/05 14:02:33 INFO SecurityManager: Changing view acls to: huetest 15/10/05 14:02:33 INFO SecurityManager: Changing modify acls to: huetest 15/10/05 14:02:33 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(huetest); users with modify permissions: Set(huetest) 15/10/05 14:02:33 INFO Slf4jLogger: Slf4jLogger started 15/10/05 14:02:33 INFO Remoting: Starting remoting 15/10/05 14:02:33 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://[email protected]:58451] 15/10/05 14:02:33 INFO Utils: Successfully started service 'sparkDriver' on port 58451. 15/10/05 14:02:33 INFO SparkEnv: Registering MapOutputTracker 15/10/05 14:02:33 INFO SparkEnv: Registering BlockManagerMaster 15/10/05 14:02:33 INFO DiskBlockManager: Created local directory at /private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/blockmgr-f63fdd28-6d86-4ae6-a91c-902fb0310fb4 15/10/05 14:02:33 INFO MemoryStore: MemoryStore started with capacity 530.0 MB 15/10/05 14:02:33 INFO HttpFileServer: HTTP File server directory is /private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-a0e35333-e2be-4b83-8a7e-3cb468270dc2/httpd-0235b01f-ee8b-40fd-96a9-de946b1a3426 15/10/05 14:02:33 INFO HttpServer: Starting HTTP Server 15/10/05 14:02:33 INFO Utils: Successfully started service 'HTTP file server' on port 58452. 15/10/05 14:02:33 INFO SparkEnv: Registering OutputCommitCoordinator 15/10/05 14:02:33 INFO Utils: Successfully started service 'SparkUI' on port 4040. 15/10/05 14:02:33 INFO SparkUI: Started SparkUI at http://172.21.1.246:4040 15/10/05 14:02:34 INFO SparkContext: Added JAR file:/Users/huetest/Dev/hue/apps/spark/java/livy-assembly/target/scala-2.10/livy-assembly-3.9.0-SNAPSHOT.jar at http://172.21.1.246:58452/jars/livy-assembly-3.9.0-SNAPSHOT.jar with timestamp 1444078954103 15/10/05 14:02:34 WARN MetricsSystem: Using default name DAGScheduler for source because spark.app.id is not set. 15/10/05 14:02:34 INFO Executor: Starting executor ID driver on host localhost 15/10/05 14:02:34 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 58453. 15/10/05 14:02:34 INFO NettyBlockTransferService: Server created on 58453 15/10/05 14:02:34 INFO BlockManagerMaster: Trying to register BlockManager 15/10/05 14:02:34 INFO BlockManagerMasterEndpoint: Registering block manager localhost:58453 with 530.0 MB RAM, BlockManagerId(driver, localhost, 58453) 15/10/05 14:02:34 INFO BlockManagerMaster: Registered BlockManager 15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(130448) called with curMem=0, maxMem=555755765 15/10/05 14:02:36 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 127.4 KB, free 529.9 MB) 15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(14276) called with curMem=130448, maxMem=555755765 15/10/05 14:02:36 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 13.9 KB, free 529.9 MB) 15/10/05 14:02:36 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:58453 (size: 13.9 KB, free: 530.0 MB) 15/10/05 14:02:36 INFO SparkContext: Created broadcast 0 from textFile at NativeMethodAccessorImpl.java:-2 15/10/05 14:02:36 INFO FileInputFormat: Total input paths to process : 1 15/10/05 14:02:36 INFO SparkContext: Starting job: collect at <stdin>:1 15/10/05 14:02:36 INFO DAGScheduler: Registering RDD 3 (reduceByKey at <stdin>:1) 15/10/05 14:02:36 INFO DAGScheduler: Registering RDD 7 (combineByKey at <stdin>:3) 15/10/05 14:02:36 INFO DAGScheduler: Got job 0 (collect at <stdin>:1) with 2 output partitions 15/10/05 14:02:36 INFO DAGScheduler: Final stage: ResultStage 2(collect at <stdin>:1) 15/10/05 14:02:36 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 1) 15/10/05 14:02:36 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 1) 15/10/05 14:02:36 INFO DAGScheduler: Submitting ShuffleMapStage 0 (PairwiseRDD[3] at reduceByKey at <stdin>:1), which has no missing parents 15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(8960) called with curMem=144724, maxMem=555755765 15/10/05 14:02:36 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 8.8 KB, free 529.9 MB) 15/10/05 14:02:36 INFO MemoryStore: ensureFreeSpace(5483) called with curMem=153684, maxMem=555755765 15/10/05 14:02:36 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 5.4 KB, free 529.9 MB) 15/10/05 14:02:36 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:58453 (size: 5.4 KB, free: 530.0 MB) 15/10/05 14:02:36 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:861 15/10/05 14:02:36 INFO DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 0 (PairwiseRDD[3] at reduceByKey at <stdin>:1) 15/10/05 14:02:36 INFO TaskSchedulerImpl: Adding task set 0.0 with 2 tasks 15/10/05 14:02:36 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, PROCESS_LOCAL, 2266 bytes) 15/10/05 14:02:36 INFO TaskSetManager: Starting task 1.0 in stage 0.0 (TID 1, localhost, PROCESS_LOCAL, 2266 bytes) 15/10/05 14:02:36 INFO Executor: Running task 0.0 in stage 0.0 (TID 0) 15/10/05 14:02:36 INFO Executor: Running task 1.0 in stage 0.0 (TID 1) 15/10/05 14:02:36 INFO Executor: Fetching http://172.21.1.246:58452/jars/livy-assembly-3.9.0-SNAPSHOT.jar with timestamp 1444078954103 15/10/05 14:02:36 INFO Utils: Fetching http://172.21.1.246:58452/jars/livy-assembly-3.9.0-SNAPSHOT.jar to /private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-a0e35333-e2be-4b83-8a7e-3cb468270dc2/userFiles-d0940846-b38a-4e4d-af07-8419b364d7ff/fetchFileTemp476551478197543813.tmp 15/10/05 14:02:36 INFO Executor: Adding file:/private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-a0e35333-e2be-4b83-8a7e-3cb468270dc2/userFiles-d0940846-b38a-4e4d-af07-8419b364d7ff/livy-assembly-3.9.0-SNAPSHOT.jar to class loader 15/10/05 14:02:36 INFO HadoopRDD: Input split: file:/Users/huetest/Downloads/babs_open_data_year_1/201402_babs_open_data/201402_trip_data.csv:0+8609511 15/10/05 14:02:36 INFO HadoopRDD: Input split: file:/Users/huetest/Downloads/babs_open_data_year_1/201402_babs_open_data/201402_trip_data.csv:8609511+8609511 15/10/05 14:02:36 INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id 15/10/05 14:02:36 INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id 15/10/05 14:02:36 INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap 15/10/05 14:02:36 INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition 15/10/05 14:02:36 INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id 15/10/05 14:02:37 INFO PythonRDD: Times: total = 727, boot = 229, init = 44, finish = 454 15/10/05 14:02:37 INFO PythonRDD: Times: total = 730, boot = 226, init = 46, finish = 458 15/10/05 14:02:37 INFO Executor: Finished task 1.0 in stage 0.0 (TID 1). 2318 bytes result sent to driver 15/10/05 14:02:37 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 2318 bytes result sent to driver 15/10/05 14:02:37 INFO TaskSetManager: Finished task 1.0 in stage 0.0 (TID 1) in 950 ms on localhost (1/2) 15/10/05 14:02:37 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 962 ms on localhost (2/2) 15/10/05 14:02:37 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool 15/10/05 14:02:37 INFO DAGScheduler: ShuffleMapStage 0 (reduceByKey at <stdin>:1) finished in 0.973 s 15/10/05 14:02:37 INFO DAGScheduler: looking for newly runnable stages """ LIVY_YARN_LOG = """ 15/10/05 13:51:21 INFO client.RMProxy: Connecting to ResourceManager at huetest-1.test.com/175.18.213.12:8032 15/10/05 13:51:21 INFO yarn.Client: Requesting a new application from cluster with 3 NodeManagers 15/10/05 13:51:21 INFO yarn.Client: Verifying our application has not requested more than the maximum memory capability of the cluster (2048 MB per container) 15/10/05 13:51:21 INFO yarn.Client: Will allocate AM container, with 1408 MB memory including 384 MB overhead 15/10/05 13:51:21 INFO yarn.Client: Setting up container launch context for our AM 15/10/05 13:51:21 INFO yarn.Client: Setting up the launch environment for our AM container 15/10/05 13:51:21 INFO yarn.Client: Preparing resources for our AM container 15/10/05 13:51:21 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/10/05 13:51:21 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/spark-1.5.0-bin-hadoop2.6/lib/spark-assembly-1.5.0-hadoop2.6.0.jar -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/spark-assembly-1.5.0-hadoop2.6.0.jar 15/10/05 13:52:00 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/hue/apps/spark/java/livy-assembly/target/scala-2.10/livy-assembly-3.9.0-SNAPSHOT.jar -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/livy-assembly-3.9.0-SNAPSHOT.jar 15/10/05 13:52:09 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/spark-1.5.0-bin-hadoop2.6/python/lib/pyspark.zip -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/pyspark.zip 15/10/05 13:52:09 INFO yarn.Client: Uploading resource file:/Users/huetest/Dev/spark-1.5.0-bin-hadoop2.6/python/lib/py4j-0.8.2.1-src.zip -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/py4j-0.8.2.1-src.zip 15/10/05 13:52:10 INFO yarn.Client: Uploading resource file:/private/var/folders/7t/31vfhhq92_g628vh8q5pspbc0000gp/T/spark-3bde33db-374c-4abe-a4af-704bd5dc09d2/__spark_conf__4420686202746650998.zip -> hdfs://huetest-1.vpc.cloudera.com:8020/user/huetest/.sparkStaging/application_1444070328046_0002/__spark_conf__4420686202746650998.zip 15/10/05 13:52:10 INFO spark.SecurityManager: Changing view acls to: huetest 15/10/05 13:52:10 INFO spark.SecurityManager: Changing modify acls to: huetest 15/10/05 13:52:10 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(huetest); users with modify permissions: Set(huetest) 15/10/05 13:52:10 INFO yarn.Client: Submitting application 2 to ResourceManager 15/10/05 13:52:10 INFO impl.YarnClientImpl: Submitted application application_1444070328046_0002 15/10/05 13:52:11 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:11 INFO yarn.Client: client token: N/A diagnostics: N/A ApplicationMaster host: N/A ApplicationMaster RPC port: -1 queue: root.huetest start time: 1444078329419 final status: UNDEFINED tracking URL: http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/ user: huetest 15/10/05 13:52:12 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:13 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:14 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:16 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:17 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:18 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:19 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:20 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:21 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:22 INFO yarn.Client: Application report for application_1444070328046_0002 (state: ACCEPTED) 15/10/05 13:52:23 INFO yarn.Client: Application report for application_1444070328046_0002 (state: RUNNING) 15/10/05 13:52:23 INFO yarn.Client: client token: N/A diagnostics: N/A ApplicationMaster host: 175.18.213.12 ApplicationMaster RPC port: 0 queue: root.huetest start time: 1444078329419 final status: UNDEFINED tracking URL: http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/ user: huetest 15/10/05 13:52:24 INFO yarn.Client: Application report for application_1444070328046_0002 (state: RUNNING) """ def setUp(self): self.user = '******' self.api = SparkApi(self.user) def test_get_jobs(self): local_jobs = [{ 'url': u'http://172.21.1.246:4040/jobs/job/?id=0', 'name': u'0' }] jobs = self.api._get_standalone_jobs(self.LIVY_STANDALONE_LOG) assert_equal(jobs, local_jobs, jobs) yarn_jobs = [{ 'url': u'http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/', 'name': u'application_1444070328046_0002' }] jobs = self.api._get_yarn_jobs(self.LIVY_YARN_LOG) assert_equal(jobs, yarn_jobs, jobs)
def setUp(self): self.user = '******' self.api = SparkApi(self.user)
class TestSparkApi(object): def setUp(self): self.client = make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False) self.user = User.objects.get(username="******") self.interpreter = { 'name': 'livy', 'options': { 'api_url': 'http://gethue.com:8998' }, } self.api = SparkApi(self.user, self.interpreter) def test_get_api(self): lang = 'pyspark' properties = None # with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api: spark_api = self.api.get_api() assert_equal(spark_api.__class__.__name__, 'LivyClient') def test_get_livy_props_method(self): test_properties = [{ "name": "files", "value": 'file_a,file_b,file_c', }] props = self.api.get_livy_props('scala', test_properties) assert_equal(props['files'], ['file_a', 'file_b', 'file_c']) def test_create_session_with_config(self): lang = 'pyspark' properties = None session_key = self.api._get_session_key() with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api: with patch('notebook.connectors.spark_shell.DefaultConfiguration') as DefaultConfiguration: with patch('notebook.connectors.spark_shell.USE_DEFAULT_CONFIGURATION') as USE_DEFAULT_CONFIGURATION: DefaultConfiguration.objects.get_configuration_for_user.return_value = Mock( properties_list=[ {'multiple': False, 'name': 'driverCores', 'defaultValue': 1, 'value': 2, 'nice_name': 'Driver Cores', 'help_text': 'Number of cores used by the driver, only in cluster mode (Default: 1)', 'type': 'number', 'is_yarn': True}] ) get_spark_api.return_value = Mock( create_session=Mock( return_value={'id': '1'} ), get_session=Mock( return_value={'state': 'idle', 'log': ''} ) ) # Case with user configuration. Expected 2 driverCores USE_DEFAULT_CONFIGURATION.get.return_value = True session = self.api.create_session(lang=lang, properties=properties) assert_equal(session['type'], 'pyspark') assert_equal(session['id'], '1') for p in session['properties']: if p['name'] == 'driverCores': cores = p['value'] assert_equal(cores, 2) if self.api._get_session_info_from_user(): self.api._remove_session_info_from_user() # Case without user configuration. Expected 1 driverCores USE_DEFAULT_CONFIGURATION.get.return_value = True DefaultConfiguration.objects.get_configuration_for_user.return_value = None session2 = self.api.create_session(lang=lang, properties=properties) assert_equal(session2['type'], 'pyspark') assert_equal(session2['id'], '1') for p in session2['properties']: if p['name'] == 'driverCores': cores = p['value'] assert_equal(cores, 1) # Case with no user configuration. Expected 1 driverCores USE_DEFAULT_CONFIGURATION.get.return_value = False session3 = self.api.create_session(lang=lang, properties=properties) assert_equal(session3['type'], 'pyspark') assert_equal(session3['id'], '1') for p in session3['properties']: if p['name'] == 'driverCores': cores = p['value'] assert_equal(cores, 1) def test_create_session_plain(self): lang = 'pyspark' properties = None session_key = self.api._get_session_key() with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api: get_spark_api.return_value = Mock( create_session=Mock( return_value={'id': '1'} ), get_session=Mock( return_value={'state': 'idle', 'log': ''} ) ) session = self.api.create_session(lang=lang, properties=properties) assert_equal(session['type'], 'pyspark') assert_equal(session['id'], '1') files_properties = [prop for prop in session['properties'] if prop['name'] == 'files'] assert_true(files_properties, session['properties']) assert_equal(files_properties[0]['value'], [], session['properties']) def test_execute(self): with patch('notebook.connectors.spark_shell._get_snippet_session') as _get_snippet_session: with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api: notebook = Mock() snippet = {'statement': 'select * from test_table'} _get_snippet_session.return_value = {'id': '1'} get_spark_api.return_value = Mock( submit_statement=Mock( return_value={'id': 'test_id'} ) ) self.api._check_session = Mock(return_value={'id': '1'}) response = self.api.execute(notebook, snippet) assert_equal(response['id'], 'test_id') get_spark_api.return_value = Mock( submit_statement=Mock() ) assert_raises(Exception, self.api.execute, notebook, snippet) def test_check_status(self): with patch('notebook.connectors.spark_shell._get_snippet_session') as _get_snippet_session: with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api: notebook = Mock() snippet = { 'result': { 'handle': { 'id': {'test_id'} } } } _get_snippet_session.return_value = {'id': '1'} get_spark_api.return_value = Mock( fetch_data=Mock( return_value={'state': 'test_state'} ) ) self.api._handle_session_health_check = Mock(return_value={'id': '1'}) response = self.api.check_status(notebook, snippet) assert_equal(response['status'], 'test_state') get_spark_api.return_value = Mock( submit_statement=Mock() ) assert_raises(Exception, self.api.check_status, notebook, snippet) def test_get_sample_data(self): snippet = Mock() self.api._execute = Mock( return_value='test_value' ) self.api.create_session = Mock( return_value={ 'id': 'test_id' } ) self.api._check_status_and_fetch_result = Mock( return_value={ 'data': 'test_data', 'meta': 'test_meta' } ) # When table is transactional self.api.describe_table = Mock( return_value={ 'stats': [{ 'data_type': 'transactional', 'col_name': 'true', 'comment': '' }] } ) response = self.api.get_sample_data(snippet, 'test_db', 'test_table', 'test_column') assert_equal(response['rows'], []) assert_equal(response['full_headers'], []) # When table is not transactional self.api.describe_table = Mock( return_value={ 'stats': [] # No details regarding transactionality is present in describe response } ) response = self.api.get_sample_data(snippet, 'test_db', 'test_table', 'test_column') assert_equal(response['rows'], 'test_data') assert_equal(response['full_headers'], 'test_meta') def test_get_select_query(self): # With operation as 'hello' response = self.api._get_select_query('test_db', 'test_table', 'test_column', 'hello') assert_equal(response, "SELECT 'Hello World!'") # Without column name response = self.api._get_select_query('test_db', 'test_table') assert_equal(response, 'SELECT *\nFROM test_db.test_table\nLIMIT 100\n') # With some column name response = self.api._get_select_query('test_db', 'test_table', 'test_column') assert_equal(response, 'SELECT test_column\nFROM test_db.test_table\nLIMIT 100\n') def test_describe_database(self): notebook = Mock() snippet = Mock() self.api.create_session = Mock( return_value={ 'id': 'test_id' } ) self.api._execute = Mock( return_value='test_value' ) self.api._check_status_and_fetch_result = Mock( return_value={ 'data': [ ['Namespace Name', 'employees'], ['Comment', 'For software companies'], ['Location', 'hdfs://test_url:8020/warehouse/tablespace/external/hive/employees.db'], ['Owner', 'demo'], ['Properties', '((Create-by,Kevin), (Create-date,09/01/2019))']], 'images': [], 'meta': [ {'comment': '', 'name': 'info_name', 'type': 'string'}, {'comment': '', 'name': 'info_value', 'type': 'string'}], 'type': 'table'} ) response = self.api.describe_database(notebook, snippet, 'employees') assert_equal(response, { 'comment': 'For software companies', 'db_name': 'employees', 'location': 'hdfs://test_url:8020/warehouse/tablespace/external/hive/employees.db', 'owner_name': 'demo', 'parameters': '{Create-by=Kevin, Create-date=09/01/2019}', 'status': 0}) def test_describe_table(self): notebook = Mock() snippet = Mock() self.api.create_session = Mock( return_value={ 'id': 'test_id' } ) self.api._execute = Mock( return_value='test_value' ) self.api._check_status_and_fetch_result = Mock( return_value={ 'data': [ ['nname', 'string', None], ['# Partition Information', '', ''], ['# col_name', 'data_type', 'comment'], ['state', 'string', 'null'], ['', '', ''], ['# Detailed Table Information', '', ''], ['Database', 'default', ''], ['Table', 'test_nonacid', ''], ['Owner', 'demo', ''], ['Created Time', 'Tue Jun 28 11:35:33 UTC 2022', ''], ['Last Access', 'UNKNOWN', ''], ['Created By', 'Spark 3.3.0.7.2.16.0-94', ''], ['Type', 'EXTERNAL', ''], ['Provider', 'hive', ''], ['Table Properties', '[TRANSLATED_TO_EXTERNAL=TRUE, bucketing_version=2, ' 'external.table.purge=TRUE, numFilesErasureCoded=0, ' 'transient_lastDdlTime=1656416152]', ''], ['Statistics', '6 bytes', ''], ['Location', 'hdfs://test_url:8020/warehouse/tablespace/external/hive/test_nonacid', ''], ['Serde Library', 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', ''], ['InputFormat', 'org.apache.hadoop.mapred.TextInputFormat', ''], ['OutputFormat', 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', ''], ['Storage Properties', '[serialization.format=1]', ''], ['Partition Provider', 'Catalog', '']], 'images': [], 'meta': [ {'comment': '', 'name': 'col_name', 'type': 'string'}, {'comment': '', 'name': 'data_type', 'type': 'string'}, {'comment': '', 'name': 'comment', 'type': 'string'}], 'type': 'table' } ) response = self.api.describe_table(notebook, snippet, 'default', 'test_nonacid') assert_equal(response, { 'cols': [{'comment': 'None', 'name': 'nname', 'type': 'string'}], 'comment': '', 'details': {'properties': { 'create_time': 'Tue Jun 28 11:35:33 UTC 2022', 'format': 'text', 'owner': 'demo', 'table_type': 'EXTERNAL'}, 'stats': [ { 'col_name': 'TRUE', 'comment': '', 'data_type': 'TRANSLATED_TO_EXTERNAL'}, { 'col_name': '2', 'comment': '', 'data_type': 'bucketing_version'}, { 'col_name': 'TRUE', 'comment': '', 'data_type': 'external.table.purge'}, { 'col_name': '0', 'comment': '', 'data_type': 'numFilesErasureCoded'}, { 'col_name': '1656416152', 'comment': '', 'data_type': 'transient_lastDdlTime'}]}, 'hdfs_link': '/filebrowser/view=/warehouse/tablespace/external/hive/test_nonacid', 'is_view': False, 'name': 'test_nonacid', 'partition_keys': [{'name': 'state', 'type': 'string'}], 'path_location': 'hdfs://test_url:8020/warehouse/tablespace/external/hive/test_nonacid', 'primary_keys': [], 'properties': [{'col_name': '# Partition Information', 'comment': '', 'data_type': ''}, {'col_name': '# col_name', 'comment': 'comment', 'data_type': 'data_type'}, {'col_name': 'state', 'comment': 'null', 'data_type': 'string'}, {'col_name': '', 'comment': '', 'data_type': ''}, {'col_name': '# Detailed Table Information', 'comment': '', 'data_type': ''}, {'col_name': 'Database', 'comment': '', 'data_type': 'default'}, {'col_name': 'Table', 'comment': '', 'data_type': 'test_nonacid'}, {'col_name': 'Owner', 'comment': '', 'data_type': 'demo'}, {'col_name': 'Created Time', 'comment': '', 'data_type': 'Tue Jun 28 11:35:33 UTC 2022'}, {'col_name': 'Last Access', 'comment': '', 'data_type': 'UNKNOWN'}, {'col_name': 'Created By', 'comment': '', 'data_type': 'Spark 3.3.0.7.2.16.0-94'}, {'col_name': 'Type', 'comment': '', 'data_type': 'EXTERNAL'}, {'col_name': 'Provider', 'comment': '', 'data_type': 'hive'}, {'col_name': 'Table Properties', 'comment': '', 'data_type': '[TRANSLATED_TO_EXTERNAL=TRUE, ' 'bucketing_version=2, external.table.purge=TRUE, ' 'numFilesErasureCoded=0, ' 'transient_lastDdlTime=1656416152]'}, {'col_name': 'Statistics', 'comment': '', 'data_type': '6 bytes'}, {'col_name': 'Location', 'comment': '', 'data_type': 'hdfs://test_url:8020/warehouse/tablespace/external/hive/test_nonacid'}, {'col_name': 'Serde Library', 'comment': '', 'data_type': 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'}, {'col_name': 'InputFormat', 'comment': '', 'data_type': 'org.apache.hadoop.mapred.TextInputFormat'}, {'col_name': 'OutputFormat', 'comment': '', 'data_type': 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'}, {'col_name': 'Storage Properties', 'comment': '', 'data_type': '[serialization.format=1]'}, {'col_name': 'Partition Provider', 'comment': '', 'data_type': 'Catalog'}], 'stats': [{'col_name': 'TRUE', 'comment': '', 'data_type': 'TRANSLATED_TO_EXTERNAL'}, {'col_name': '2', 'comment': '', 'data_type': 'bucketing_version'}, {'col_name': 'TRUE', 'comment': '', 'data_type': 'external.table.purge'}, {'col_name': '0', 'comment': '', 'data_type': 'numFilesErasureCoded'}, {'col_name': '1656416152', 'comment': '', 'data_type': 'transient_lastDdlTime'}], 'status': 0}) def test_get_jobs(self): local_jobs = [ {'url': u'http://172.21.1.246:4040/jobs/job/?id=0', 'name': u'0'} ] jobs = self.api._get_standalone_jobs(LIVY_STANDALONE_LOG) assert_equal(jobs, local_jobs, jobs) yarn_jobs = [ {'url': u'http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/', 'name': u'application_1444070328046_0002'} ] jobs = self.api._get_yarn_jobs(LIVY_YARN_LOG) assert_equal(jobs, yarn_jobs, jobs)
def get_api(request, snippet): from notebook.connectors.oozie_batch import OozieApi if snippet.get('wasBatchExecuted') and not TASK_SERVER.ENABLED.get(): return OozieApi(user=request.user, request=request) if snippet.get('type') == 'report': snippet['type'] = 'impala' patch_snippet_for_connector(snippet) connector_name = snippet['type'] if has_connectors() and snippet.get('type') == 'hello' and is_admin( request.user): interpreter = snippet.get('interpreter') else: interpreter = get_interpreter(connector_type=connector_name, user=request.user) interface = interpreter['interface'] if get_cluster_config(request.user)['has_computes']: compute = json.loads(request.POST.get( 'cluster', '""')) # Via Catalog autocomplete API or Notebook create sessions. if compute == '""' or compute == 'undefined': compute = None if not compute and snippet.get('compute'): # Via notebook.ko.js interpreter['compute'] = snippet['compute'] LOG.debug('Selected interpreter %s interface=%s compute=%s' % (interpreter['type'], interface, interpreter.get('compute') and interpreter['compute']['name'])) if interface == 'hiveserver2' or interface == 'hms': from notebook.connectors.hiveserver2 import HS2Api return HS2Api(user=request.user, request=request, interpreter=interpreter) elif interface == 'oozie': return OozieApi(user=request.user, request=request) elif interface == 'livy': from notebook.connectors.spark_shell import SparkApi return SparkApi(request.user, interpreter=interpreter) elif interface == 'livy-batch': from notebook.connectors.spark_batch import SparkBatchApi return SparkBatchApi(request.user, interpreter=interpreter) elif interface == 'text' or interface == 'markdown': from notebook.connectors.text import TextApi return TextApi(request.user) elif interface == 'rdbms': from notebook.connectors.rdbms import RdbmsApi return RdbmsApi(request.user, interpreter=snippet['type'], query_server=snippet.get('query_server')) elif interface == 'jdbc': if interpreter['options'] and interpreter['options'].get( 'url', '').find('teradata') >= 0: from notebook.connectors.jdbc_teradata import JdbcApiTeradata return JdbcApiTeradata(request.user, interpreter=interpreter) if interpreter['options'] and interpreter['options'].get( 'url', '').find('awsathena') >= 0: from notebook.connectors.jdbc_athena import JdbcApiAthena return JdbcApiAthena(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get( 'url', '').find('presto') >= 0: from notebook.connectors.jdbc_presto import JdbcApiPresto return JdbcApiPresto(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get( 'url', '').find('clickhouse') >= 0: from notebook.connectors.jdbc_clickhouse import JdbcApiClickhouse return JdbcApiClickhouse(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get( 'url', '').find('vertica') >= 0: from notebook.connectors.jdbc_vertica import JdbcApiVertica return JdbcApiVertica(request.user, interpreter=interpreter) else: from notebook.connectors.jdbc import JdbcApi return JdbcApi(request.user, interpreter=interpreter) elif interface == 'teradata': from notebook.connectors.jdbc_teradata import JdbcApiTeradata return JdbcApiTeradata(request.user, interpreter=interpreter) elif interface == 'athena': from notebook.connectors.jdbc_athena import JdbcApiAthena return JdbcApiAthena(request.user, interpreter=interpreter) elif interface == 'presto': from notebook.connectors.jdbc_presto import JdbcApiPresto return JdbcApiPresto(request.user, interpreter=interpreter) elif interface == 'sqlalchemy': from notebook.connectors.sql_alchemy import SqlAlchemyApi return SqlAlchemyApi(request.user, interpreter=interpreter) elif interface == 'solr': from notebook.connectors.solr import SolrApi return SolrApi(request.user, interpreter=interpreter) elif interface == 'hbase': from notebook.connectors.hbase import HBaseApi return HBaseApi(request.user) elif interface == 'ksql': from notebook.connectors.ksql import KSqlApi return KSqlApi(request.user, interpreter=interpreter) elif interface == 'flink': from notebook.connectors.flink_sql import FlinkSqlApi return FlinkSqlApi(request.user, interpreter=interpreter) elif interface == 'kafka': from notebook.connectors.kafka import KafkaApi return KafkaApi(request.user) elif interface == 'pig': return OozieApi(user=request.user, request=request) # Backward compatibility until Hue 4 else: raise PopupException( _('Notebook connector interface not recognized: %s') % interface)