def install_custom_examples(): if EXAMPLES.AUTO_LOAD.get(): from desktop.auth.backend import rewrite_user from beeswax.management.commands import beeswax_install_examples from useradmin.models import install_sample_user user = rewrite_user(install_sample_user()) if has_connectors(): interpreters = [{ 'type': connector['id'], 'dialect': connector['dialect'] } for connector in _get_installed_connectors(category='editor')] else: interpreters = [ { 'type': interpreter['dialect'], 'dialect': interpreter['dialect'] } for interpreter in get_ordered_interpreters(user) # Only for hive/impala currently, would also need to port to Notebook install examples. if interpreter['dialect'] in ('hive', 'impala') ] queries = EXAMPLES.QUERIES.get() tables = EXAMPLES.TABLES.get( ) # No-op. Only for the saved query samples, not the tables currently. LOG.info( 'Installing custom examples queries: %(queries)s, tables: %(tables)s for dialects %(dialects)s ' 'belonging to user %(user)s' % { 'queries': queries, 'tables': tables, 'dialects': [interpreter['dialect'] for interpreter in interpreters], 'user': user }) result = [] for interpreter in interpreters: successes, errors = beeswax_install_examples.Command().handle( dialect=interpreter['dialect'], user=user, interpreter=interpreter, queries=queries, tables=tables, request=None) LOG.info( 'Dialect %(dialect)s installed samples: %(successes)s, %(errors)s,' % { 'dialect': interpreter['dialect'], 'successes': successes, 'errors': errors, }) result.append((successes, errors)) return result
def get_query_server_config(connector=None): if connector and has_connectors(): query_server = get_query_server_config_via_connector(connector) else: server_port = get_hs2_http_port() if conf.USE_THRIFT_HTTP.get() else conf.SERVER_PORT.get() query_server = { 'server_name': 'impala', 'dialect': 'impala', 'server_host': conf.SERVER_HOST.get(), 'server_port': server_port, 'principal': conf.IMPALA_PRINCIPAL.get(), 'http_url': '%(protocol)s://%(host)s:%(port)s' % { 'protocol': 'https' if conf.SSL.ENABLED.get() else 'http', 'host': conf.SERVER_HOST.get(), 'port': server_port }, 'impersonation_enabled': conf.IMPERSONATION_ENABLED.get(), 'querycache_rows': conf.QUERYCACHE_ROWS.get(), 'QUERY_TIMEOUT_S': conf.QUERY_TIMEOUT_S.get(), 'SESSION_TIMEOUT_S': conf.SESSION_TIMEOUT_S.get(), 'auth_username': conf.AUTH_USERNAME.get(), 'auth_password': conf.AUTH_PASSWORD.get(), 'use_sasl': conf.USE_SASL.get(), 'transport_mode': 'http' if conf.USE_THRIFT_HTTP.get() else 'socket', } debug_query_server = query_server.copy() debug_query_server['auth_password_used'] = bool(debug_query_server.pop('auth_password')) LOG.debug("Query Server: %s" % debug_query_server) return query_server
def get_query_server_config(connector=None): if connector and has_connectors(): query_server = get_query_server_config_via_connector(connector) else: query_server = { 'server_name': 'impala', 'dialect': 'impala', 'server_host': conf.SERVER_HOST.get(), 'server_port': conf.SERVER_PORT.get(), 'principal': conf.IMPALA_PRINCIPAL.get(), 'impersonation_enabled': conf.IMPERSONATION_ENABLED.get(), 'querycache_rows': conf.QUERYCACHE_ROWS.get(), 'QUERY_TIMEOUT_S': conf.QUERY_TIMEOUT_S.get(), 'SESSION_TIMEOUT_S': conf.SESSION_TIMEOUT_S.get(), 'auth_username': conf.AUTH_USERNAME.get(), 'auth_password': conf.AUTH_PASSWORD.get(), 'use_sasl': conf.USE_SASL.get() } debug_query_server = query_server.copy() debug_query_server['auth_password_used'] = bool( debug_query_server.pop('auth_password')) LOG.debug("Query Server: %s" % debug_query_server) return query_server
def config_validator(user): # dbms is dependent on beeswax.conf, import in method to avoid circular dependency from beeswax.design import hql_query from beeswax.server import dbms from beeswax.server.dbms import get_query_server_config res = [] if has_connectors(): return res try: try: if not 'test' in sys.argv: # Avoid tests hanging query_server = get_query_server_config(name='impala') server = dbms.get(user, query_server) query = hql_query("SELECT 'Hello World!';") handle = server.execute_and_wait(query, timeout_sec=10.0) if handle: server.fetch(handle, rows=100) server.close(handle) except StructuredThriftTransportException as ex: if 'TSocket read 0 bytes' in str(ex): # this message appears when authentication fails msg = "Failed to authenticate to Impalad, check authentication configurations." LOG.exception(msg) res.append((NICE_NAME, _(msg))) else: raise ex except Exception as ex: msg = "No available Impalad to send queries to." LOG.exception(msg) res.append((NICE_NAME, _(msg))) return res
def fetch_result_size(self, notebook, snippet): resp = {'rows': None, 'size': None, 'message': ''} if snippet.get('status') != 'available': raise QueryError(_('Result status is not available')) if has_connectors(): # TODO: Add dialect to snippet and update fetchResultSize() in notebook.ko interpreter = get_interpreter(connector_type=snippet['type']) snippet_dialect = interpreter['dialect'] else: snippet_dialect = snippet['type'] if snippet_dialect not in ('hive', 'impala'): raise OperationNotSupported( _('Cannot fetch result metadata for snippet type: %s') % snippet_dialect) if snippet_dialect == 'hive': resp['rows'], resp['size'], resp[ 'message'] = self._get_hive_result_size(notebook, snippet) else: resp['rows'], resp['size'], resp[ 'message'] = self._get_impala_result_size(notebook, snippet) return resp
def config_validator(user): ''' v2 When using the connectors, now 'hive' is seen as a dialect and only the list of connections (instance of the 'hive' connector, e.g. pointing to a Hive server in the Cloud) should be tested. Interpreters are now tested by the Editor in libs/notebook/conf.py. v1 All the configuration happens in apps/beeswax. ''' from beeswax.design import hql_query # dbms is dependent on beeswax.conf, import in method to avoid circular dependency from beeswax.server import dbms res = [] if has_connectors(): return res try: try: if not 'test' in sys.argv: # Avoid tests hanging server = dbms.get(user) query = hql_query("SELECT 'Hello World!';") handle = server.execute_and_wait(query, timeout_sec=10.0) if handle: server.fetch(handle, rows=100) server.close(handle) except StructuredThriftTransportException as e: if 'Error validating the login' in str(e): msg = 'Failed to authenticate to HiveServer2, check authentication configurations.' LOG.exception(msg) res.append((NICE_NAME, _(msg))) else: raise e except Exception as e: msg = "The application won't work without a running HiveServer2." LOG.exception(msg) res.append((NICE_NAME, _(msg))) try: from desktop.lib.fsmanager import get_filesystem from aws.conf import is_enabled as is_s3_enabled warehouse = beeswax.hive_site.get_metastore_warehouse_dir() fs = get_filesystem() fs_scheme = fs._get_scheme(warehouse) if fs: if fs_scheme == 's3a': if is_s3_enabled(): fs.do_as_user(user, fs.stats, warehouse) else: LOG.warn("Warehouse is in S3, but no credential available.") else: fs.do_as_superuser(fs.stats, warehouse) except Exception: msg = 'Failed to access Hive warehouse: %s' LOG.exception(msg % warehouse) res.append((NICE_NAME, _(msg) % warehouse)) return res
def create_session(self, **properties): properties['proxyUser'] = self.user.split('@')[0] if has_connectors(): # Only SQL supported via connectors currently properties['kind'] = 'sql' return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE)
def get_ordered_interpreters(user=None): if has_connectors(): from desktop.lib.connectors.api import _get_installed_connectors interpreters = [ _connector_to_iterpreter(connector) for connector in _get_installed_connectors( categories=['editor', 'catalogs'], user=user) ] else: if not INTERPRETERS.get(): _default_interpreters(user) interpreters = INTERPRETERS.get() user_apps = appmanager.get_apps_dict(user) user_interpreters = [] for interpreter in interpreters: if check_permissions(user, interpreter, user_apps=user_apps): pass # Not allowed else: user_interpreters.append(interpreter) interpreters_shown_on_wheel = _remove_duplications( INTERPRETERS_SHOWN_ON_WHEEL.get()) unknown_interpreters = set(interpreters_shown_on_wheel) - set( user_interpreters) if unknown_interpreters: # Just filtering it out might be better than failing for this user raise ValueError( "Interpreters from interpreters_shown_on_wheel is not in the list of Interpreters %s" % unknown_interpreters) reordered_interpreters = interpreters_shown_on_wheel + [ i for i in user_interpreters if i not in interpreters_shown_on_wheel ] interpreters = [{ 'name': interpreters[i].NAME.get(), 'type': i, 'interface': interpreters[i].INTERFACE.get(), 'options': interpreters[i].OPTIONS.get() } for i in reordered_interpreters] return [{ "name": i.get('nice_name', i['name']), "type": i['type'], "interface": i['interface'], "options": i['options'], 'dialect': i.get('dialect', i['name']).lower(), 'dialect_properties': i.get('dialect_properties'), 'category': i.get('category', 'editor'), "is_sql": i.get('is_sql') or \ i['interface'] in ["hiveserver2", "rdbms", "jdbc", "solr", "sqlalchemy", "ksql", "flink"] or \ i['type'] == 'sql', "is_catalog": i['interface'] in ["hms",], } for i in interpreters ]
def has_hdfs_enabled(): if has_connectors(): from desktop.lib.connectors.api import _get_installed_connectors return any([ connector for connector in _get_installed_connectors() if connector['dialect'] == 'hdfs' ]) else: return list(HDFS_CLUSTERS.keys())
def get_ordered_interpreters(user=None): if not INTERPRETERS.get(): _default_interpreters(user) interpreters = INTERPRETERS.get() interpreters_shown_on_wheel = _remove_duplications(INTERPRETERS_SHOWN_ON_WHEEL.get()) user_apps = appmanager.get_apps_dict(user) user_interpreters = [] for interpreter in interpreters: if check_permissions(user, interpreter, user_apps=user_apps): pass # Not allowed else: user_interpreters.append(interpreter) unknown_interpreters = set(interpreters_shown_on_wheel) - set(user_interpreters) if unknown_interpreters: raise ValueError("Interpreters from interpreters_shown_on_wheel is not in the list of Interpreters %s" % unknown_interpreters) if has_connectors(): from desktop.lib.connectors.api import _get_installed_connectors reordered_interpreters = [{ 'name': connector['nice_name'], 'type': connector['name'], 'dialect': connector['dialect'], 'category': connector['category'], 'is_sql': connector.get('is_sql', False), 'interface': connector['interface'], 'options': {setting['name']: setting['value'] for setting in connector['settings']} } for connector in _get_installed_connectors(categories=['editor', 'catalogs']) ] else: reordered_interpreters = interpreters_shown_on_wheel + [i for i in user_interpreters if i not in interpreters_shown_on_wheel] reordered_interpreters = [{ 'name': interpreters[i].NAME.get(), 'type': i, 'interface': interpreters[i].INTERFACE.get(), 'options': interpreters[i].OPTIONS.get() } for i in reordered_interpreters ] return [{ "name": i.get('nice_name', i['name']), "type": i['type'], "interface": i['interface'], "options": i['options'], 'dialect': i.get('dialect', i['name']).lower(), 'category': i.get('category', 'editor'), "is_sql": i.get('is_sql') or i['interface'] in ["hiveserver2", "rdbms", "jdbc", "solr", "sqlalchemy"], "is_catalog": i['interface'] in ["hms",], } for i in reordered_interpreters ]
def get_ordered_interpreters(user=None): from desktop.lib.connectors.api import CONNECTOR_INSTANCES global CONNECTOR_INSTANCES if not INTERPRETERS.get(): _default_interpreters(user) interpreters = INTERPRETERS.get() interpreters_shown_on_wheel = _remove_duplications(INTERPRETERS_SHOWN_ON_WHEEL.get()) user_apps = appmanager.get_apps_dict(user) user_interpreters = [] for interpreter in interpreters: if check_permissions(user, interpreter, user_apps=user_apps): pass # Not allowed else: user_interpreters.append(interpreter) unknown_interpreters = set(interpreters_shown_on_wheel) - set(user_interpreters) if unknown_interpreters: raise ValueError("Interpreters from interpreters_shown_on_wheel is not in the list of Interpreters %s" % unknown_interpreters) if has_connectors(): reordered_interpreters = [{ 'name': i['name'], 'type': i['type'], 'interface': i['interface'], 'options': {setting['name']: setting['value'] for setting in i['settings']} } for i in CONNECTOR_INSTANCES ] else: reordered_interpreters = interpreters_shown_on_wheel + [i for i in user_interpreters if i not in interpreters_shown_on_wheel] reordered_interpreters = [{ 'name': interpreters[i].NAME.get(), 'type': i, 'interface': interpreters[i].INTERFACE.get(), 'options': interpreters[i].OPTIONS.get() } for i in reordered_interpreters ] return [{ "name": i['name'], "type": i['type'], "interface": i['interface'], "options": i['options'], "is_sql": i['interface'] in ["hiveserver2", "rdbms", "jdbc", "solr", "sqlalchemy"], "is_catalog": i['interface'] in ["hms",], } for i in reordered_interpreters ]
def get_api(user, connector_id): if has_connectors() and connector_id != 'dummy': connectors = _get_installed_connectors(user=user, connector_id=int(connector_id)) connector = connectors[0] dialect = connector['dialect'] else: connector = None # Could get the interpreter if Connectors are off dialect = connector_id if dialect == 'dummy': return Base(user, connector_id) else: raise PopupException( _('Indexer connector dialect not recognized: %s') % dialect)
def config_validator(user, interpreters=None): res = [] if not has_connectors(): return res client = Client() client.force_login(user=user) if not user.is_authenticated: res.append(( 'Editor', _('Could not authenticate with user %s to validate interpreters') % user)) if interpreters is None: interpreters = get_ordered_interpreters(user=user) for interpreter in interpreters: if interpreter.get('is_sql'): connector_id = interpreter['type'] try: response = _excute_test_query(client, connector_id, interpreter=interpreter) data = json.loads(response.content) if data['status'] != 0: raise Exception(data) except Exception as e: trace = str(e) msg = "Testing the connector connection failed." if 'Error validating the login' in trace or 'TSocket read 0 bytes' in trace: msg += ' Failed to authenticate, check authentication configurations.' LOG.exception(msg) res.append(('%(name)s - %(dialect)s (%(type)s)' % interpreter, _(msg) + (' %s' % trace[:100] + ('...' if len(trace) > 50 else '')))) return res
def get_all_hdfs(): global FS_CACHE if FS_CACHE: return FS_CACHE FS_CACHE = {} if has_connectors(): for connector in _get_installed_connectors(category='browsers', dialect='hdfs', interface='rest'): settings = { setting['name']: setting['value'] for setting in connector['settings'] } FS_CACHE[connector['name']] = webhdfs.WebHdfs( url=settings['server_url'], fs_defaultfs=settings['default_fs']) else: for identifier in list(conf.HDFS_CLUSTERS.keys()): FS_CACHE[identifier] = _make_filesystem(identifier) return FS_CACHE
def get_ordered_interpreters(user=None): global INTERPRETERS_CACHE if has_connectors(): from desktop.lib.connectors.api import _get_installed_connectors interpreters = [ _connector_to_interpreter(connector) for connector in _get_installed_connectors( categories=['editor', 'catalogs'], user=user) ] else: if INTERPRETERS_CACHE is None: none_user = None # for getting full list of interpreters if is_cm_managed(): extra_interpreters = INTERPRETERS.get( ) # Combine the other apps interpreters _default_interpreters(none_user) else: extra_interpreters = {} if not INTERPRETERS.get(): _default_interpreters(none_user) INTERPRETERS_CACHE = INTERPRETERS.get() INTERPRETERS_CACHE.update(extra_interpreters) user_apps = appmanager.get_apps_dict(user) user_interpreters = [] for interpreter in INTERPRETERS_CACHE: if check_has_missing_permission(user, interpreter, user_apps=user_apps): pass # Not allowed else: user_interpreters.append(interpreter) interpreters_shown_on_wheel = _remove_duplications( INTERPRETERS_SHOWN_ON_WHEEL.get()) unknown_interpreters = set(interpreters_shown_on_wheel) - set( user_interpreters) if unknown_interpreters: # Just filtering it out might be better than failing for this user raise ValueError( "Interpreters from interpreters_shown_on_wheel is not in the list of Interpreters %s" % unknown_interpreters) reordered_interpreters = interpreters_shown_on_wheel + [ i for i in user_interpreters if i not in interpreters_shown_on_wheel ] interpreters = [{ 'name': INTERPRETERS_CACHE[i].NAME.get(), 'type': i, 'interface': INTERPRETERS_CACHE[i].INTERFACE.get(), 'options': INTERPRETERS_CACHE[i].OPTIONS.get() } for i in reordered_interpreters] return [{ "name": i.get('nice_name', i['name']), 'displayName': 'Unified Analytics' if ENABLE_UNIFIED_ANALYTICS.get() and i.get('dialect', i['name']).lower() == 'hive' else i.get('nice_name', i['name']), "type": i['type'], "interface": i['interface'], "options": i['options'], 'dialect': i.get('dialect', i['name']).lower(), 'dialect_properties': i.get('dialect_properties') or {}, # Empty when connectors off 'category': i.get('category', 'editor'), "is_sql": i.get('is_sql') or \ i['interface'] in ["hiveserver2", "rdbms", "jdbc", "solr", "sqlalchemy", "ksql", "flink"] or \ i['type'] == 'sql', "is_catalog": i['interface'] in ["hms",], } for i in interpreters ]
def get_api(request, snippet): from notebook.connectors.oozie_batch import OozieApi if snippet.get('wasBatchExecuted') and not TASK_SERVER.ENABLED.get(): return OozieApi(user=request.user, request=request) if snippet.get('type') == 'report': snippet['type'] = 'impala' patch_snippet_for_connector(snippet) connector_name = snippet['type'] if has_connectors() and snippet.get('type') == 'hello' and is_admin( request.user): interpreter = snippet.get('interpreter') else: interpreter = get_interpreter(connector_type=connector_name, user=request.user) interface = interpreter['interface'] if get_cluster_config(request.user)['has_computes']: compute = json.loads(request.POST.get( 'cluster', '""')) # Via Catalog autocomplete API or Notebook create sessions. if compute == '""' or compute == 'undefined': compute = None if not compute and snippet.get('compute'): # Via notebook.ko.js interpreter['compute'] = snippet['compute'] LOG.debug('Selected interpreter %s interface=%s compute=%s' % (interpreter['type'], interface, interpreter.get('compute') and interpreter['compute']['name'])) if interface == 'hiveserver2' or interface == 'hms': from notebook.connectors.hiveserver2 import HS2Api return HS2Api(user=request.user, request=request, interpreter=interpreter) elif interface == 'oozie': return OozieApi(user=request.user, request=request) elif interface == 'livy': from notebook.connectors.spark_shell import SparkApi return SparkApi(request.user, interpreter=interpreter) elif interface == 'livy-batch': from notebook.connectors.spark_batch import SparkBatchApi return SparkBatchApi(request.user, interpreter=interpreter) elif interface == 'text' or interface == 'markdown': from notebook.connectors.text import TextApi return TextApi(request.user) elif interface == 'rdbms': from notebook.connectors.rdbms import RdbmsApi return RdbmsApi(request.user, interpreter=snippet['type'], query_server=snippet.get('query_server')) elif interface == 'jdbc': if interpreter['options'] and interpreter['options'].get( 'url', '').find('teradata') >= 0: from notebook.connectors.jdbc_teradata import JdbcApiTeradata return JdbcApiTeradata(request.user, interpreter=interpreter) if interpreter['options'] and interpreter['options'].get( 'url', '').find('awsathena') >= 0: from notebook.connectors.jdbc_athena import JdbcApiAthena return JdbcApiAthena(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get( 'url', '').find('presto') >= 0: from notebook.connectors.jdbc_presto import JdbcApiPresto return JdbcApiPresto(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get( 'url', '').find('clickhouse') >= 0: from notebook.connectors.jdbc_clickhouse import JdbcApiClickhouse return JdbcApiClickhouse(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get( 'url', '').find('vertica') >= 0: from notebook.connectors.jdbc_vertica import JdbcApiVertica return JdbcApiVertica(request.user, interpreter=interpreter) else: from notebook.connectors.jdbc import JdbcApi return JdbcApi(request.user, interpreter=interpreter) elif interface == 'teradata': from notebook.connectors.jdbc_teradata import JdbcApiTeradata return JdbcApiTeradata(request.user, interpreter=interpreter) elif interface == 'athena': from notebook.connectors.jdbc_athena import JdbcApiAthena return JdbcApiAthena(request.user, interpreter=interpreter) elif interface == 'presto': from notebook.connectors.jdbc_presto import JdbcApiPresto return JdbcApiPresto(request.user, interpreter=interpreter) elif interface == 'sqlalchemy': from notebook.connectors.sql_alchemy import SqlAlchemyApi return SqlAlchemyApi(request.user, interpreter=interpreter) elif interface == 'solr': from notebook.connectors.solr import SolrApi return SolrApi(request.user, interpreter=interpreter) elif interface == 'hbase': from notebook.connectors.hbase import HBaseApi return HBaseApi(request.user) elif interface == 'ksql': from notebook.connectors.ksql import KSqlApi return KSqlApi(request.user, interpreter=interpreter) elif interface == 'flink': from notebook.connectors.flink_sql import FlinkSqlApi return FlinkSqlApi(request.user, interpreter=interpreter) elif interface == 'kafka': from notebook.connectors.kafka import KafkaApi return KafkaApi(request.user) elif interface == 'pig': return OozieApi(user=request.user, request=request) # Backward compatibility until Hue 4 else: raise PopupException( _('Notebook connector interface not recognized: %s') % interface)
def get_hdfs(identifier="default", user=None): global FS_CACHE get_all_hdfs() return FS_CACHE[list( FS_CACHE.keys())[0]] if has_connectors() else FS_CACHE[identifier]
def get_hdfs(identifier="default", user=None): global FS_CACHE get_all_hdfs() if has_connectors(): identifier = list(FS_CACHE.keys())[0] if FS_CACHE.keys() else None return FS_CACHE.get(identifier)
def get_query_server_config(name='beeswax', connector=None): if connector and has_connectors(): # TODO: Give empty connector when no connector in use LOG.debug("Query via connector %s" % name) query_server = get_query_server_config_via_connector(connector) else: LOG.debug("Query via ini %s" % name) if name == "llap": activeEndpoint = cache.get('llap') if activeEndpoint is None: if HIVE_DISCOVERY_LLAP.get(): LOG.debug("Checking zookeeper for discovering Hive LLAP server endpoint") zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True) zk.start() if HIVE_DISCOVERY_LLAP_HA.get(): znode = "{0}/instances".format(HIVE_DISCOVERY_LLAP_ZNODE.get()) LOG.debug("Setting up Hive LLAP HA with the following node {0}".format(znode)) if zk.exists(znode): hiveservers = zk.get_children(znode) if not hiveservers: raise PopupException(_('There is no running Hive LLAP server available')) LOG.info("Available Hive LLAP servers: {0}".format(hiveservers)) for server in hiveservers: llap_servers = json.loads(zk.get("{0}/{1}".format(znode, server))[0])["internal"][0] if llap_servers["api"] == "activeEndpoint": LOG.info("Selecting Hive LLAP server: {0}".format(llap_servers)) cache.set( "llap", json.dumps({ "host": llap_servers["addresses"][0]["host"], "port": llap_servers["addresses"][0]["port"] }), CACHE_TIMEOUT.get() ) else: LOG.error("Hive LLAP endpoint not found, reverting to config values") cache.set("llap", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}), CACHE_TIMEOUT.get()) else: znode = "{0}".format(HIVE_DISCOVERY_LLAP_ZNODE.get()) LOG.debug("Setting up Hive LLAP with the following node {0}".format(znode)) if zk.exists(znode): hiveservers = zk.get_children(znode) for server in hiveservers: cache.set( "llap", json.dumps({ "host": server.split(';')[0].split('=')[1].split(":")[0], "port": server.split(';')[0].split('=')[1].split(":")[1] }) ) zk.stop() else: LOG.debug("Zookeeper discovery not enabled, reverting to config values") cache.set("llap", json.dumps({"host": LLAP_SERVER_HOST.get(), "port": LLAP_SERVER_THRIFT_PORT.get()}), CACHE_TIMEOUT.get()) activeEndpoint = json.loads(cache.get("llap")) elif name != 'hms' and name != 'impala': activeEndpoint = cache.get("hiveserver2") if activeEndpoint is None: if HIVE_DISCOVERY_HS2.get(): hiveservers = get_zk_hs2() LOG.debug("Available Hive Servers: {0}".format(hiveservers)) if not hiveservers: raise PopupException(_('There is no running Hive server available')) server_to_use = 0 LOG.debug("Selected Hive server {0}: {1}".format(server_to_use, hiveservers[server_to_use])) cache.set( "hiveserver2", json.dumps({ "host": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0], "port": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[1] }) ) else: cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()})) else: # Setting hs2 cache in-case there is no HS2 discovery cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()})) if HIVE_DISCOVERY_HS2.get(): # Replace ActiveEndpoint if the current HS2 is down hiveservers = get_zk_hs2() if hiveservers is not None: server_to_use = 0 hs2_host_name = hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0] hs2_in_active_endpoint = hs2_host_name in activeEndpoint LOG.debug("Is the current HS2 active {0}".format(hs2_in_active_endpoint)) if not hs2_in_active_endpoint: LOG.error( 'Current HiveServer is down, working to connect with the next available HiveServer from Zookeeper') reset_ha() server_to_use = 0 LOG.debug("Selected HiveServer {0}: {1}".format(server_to_use, hiveservers[server_to_use])) cache.set( "hiveserver2", json.dumps({ "host": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0], "port": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[1] }) ) activeEndpoint = json.loads(cache.get("hiveserver2")) if name == 'impala': from impala.dbms import get_query_server_config as impala_query_server_config query_server = impala_query_server_config() elif name == 'hms': kerberos_principal = get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'hms', 'server_host': HIVE_METASTORE_HOST.get() if not cluster_config else cluster_config.get('server_host'), 'server_port': HIVE_METASTORE_PORT.get(), 'principal': kerberos_principal, 'transport_mode': 'http' if hiveserver2_transport_mode() == 'HTTP' else 'socket', 'auth_username': AUTH_USERNAME.get(), 'auth_password': AUTH_PASSWORD.get(), 'use_sasl': HIVE_USE_SASL.get() } else: kerberos_principal = get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'beeswax' if name != 'hplsql' else 'hplsql', 'server_host': activeEndpoint["host"], 'server_port': LLAP_SERVER_PORT.get() if name == 'llap' else HIVE_SERVER_PORT.get(), 'principal': kerberos_principal, 'http_url': '%(protocol)s://%(host)s:%(port)s/%(end_point)s' % { 'protocol': 'https' if hiveserver2_use_ssl() else 'http', 'host': activeEndpoint["host"], 'port': activeEndpoint["port"], 'end_point': hiveserver2_thrift_http_path() }, 'transport_mode': 'http' if hiveserver2_transport_mode() == 'HTTP' else 'socket', 'auth_username': AUTH_USERNAME.get(), 'auth_password': AUTH_PASSWORD.get(), 'use_sasl': HIVE_USE_SASL.get(), 'close_sessions': CLOSE_SESSIONS.get(), 'has_session_pool': has_session_pool(), 'max_number_of_sessions': MAX_NUMBER_OF_SESSIONS.get() } if name == 'sparksql': # Extends Hive as very similar from spark.conf import SQL_SERVER_HOST as SPARK_SERVER_HOST, SQL_SERVER_PORT as SPARK_SERVER_PORT, USE_SASL as SPARK_USE_SASL query_server.update({ 'server_name': 'sparksql', 'server_host': SPARK_SERVER_HOST.get(), 'server_port': SPARK_SERVER_PORT.get(), 'use_sasl': SPARK_USE_SASL.get() }) if not query_server.get('dialect'): query_server['dialect'] = query_server['server_name'] debug_query_server = query_server.copy() debug_query_server['auth_password_used'] = bool(debug_query_server.pop('auth_password', None)) LOG.debug("Query Server: %s" % debug_query_server) return query_server
def _get_servername(db): if has_connectors(): return db.client.query_server['server_name'] else: return 'hive' if db.server_name == 'beeswax' else db.server_name
def process_view(self, request, view_func, view_args, view_kwargs): """ We also perform access logging in ``process_view()`` since we have the view function, which tells us the log level. The downside is that we don't have the status code, which isn't useful for status logging anyways. """ request.ts = time.time() request.view_func = view_func access_log_level = getattr(view_func, 'access_log_level', None) # Skip loop for oidc if request.path in [ '/oidc/authenticate/', '/oidc/callback/', '/oidc/logout/', '/hue/oidc_failed/' ]: return None if request.path.startswith( '/api/') or request.path == '/notebook/api/create_session': return None # Skip views not requiring login # If the view has "opted out" of login required, skip if hasattr(view_func, "login_notrequired"): log_page_hit(request, view_func, level=access_log_level or logging.DEBUG) return None # There are certain django views which are also opt-out, but # it would be evil to go add attributes to them if view_func in DJANGO_VIEW_AUTH_WHITELIST: log_page_hit(request, view_func, level=access_log_level or logging.DEBUG) return None # If user is logged in, check that he has permissions to access the app if request.user.is_active and request.user.is_authenticated: AppSpecificMiddleware.augment_request_with_app(request, view_func) # Until Django 1.3 which resolves returning the URL name, just do a match of the name of the view try: access_view = 'access_view:%s:%s' % ( request._desktop_app, resolve(request.path)[0].__name__) except Exception as e: access_log(request, 'error checking view perm: %s' % e, level=access_log_level) access_view = '' app_accessed = request._desktop_app app_libs_whitelist = [ "desktop", "home", "home2", "about", "hue", "editor", "notebook", "indexer", "404", "500", "403" ] if has_connectors(): app_libs_whitelist.append('metadata') if DASHBOARD_ENABLED.get(): app_libs_whitelist.append('dashboard') # Accessing an app can access an underlying other app. # e.g. impala or spark uses code from beeswax and so accessing impala shows up as beeswax here. # Here we trust the URL to be the real app we need to check the perms. ui_app_accessed = get_app_name(request) if app_accessed != ui_app_accessed and ui_app_accessed not in ( 'logs', 'accounts', 'login'): app_accessed = ui_app_accessed if app_accessed and \ app_accessed not in app_libs_whitelist and \ not ( is_admin(request.user) or request.user.has_hue_permission(action="access", app=app_accessed) or request.user.has_hue_permission(action=access_view, app=app_accessed) ) and \ not (app_accessed == '__debug__' and DJANGO_DEBUG_MODE.get()): access_log(request, 'permission denied', level=access_log_level) return PopupException(_( "You do not have permission to access the %(app_name)s application." ) % { 'app_name': app_accessed.capitalize() }, error_code=401).response(request) else: if not hasattr(request, 'view_func'): log_page_hit(request, view_func, level=access_log_level) return None if AUTH.AUTO_LOGIN_ENABLED.get(): # Auto-create the hue/hue user if not already present user = find_or_create_user(username='******', password='******') ensure_has_a_group(user) user = rewrite_user(user) user.is_active = True user.save() user = authenticate(request, username='******', password='******') if user is not None: login(request, user) return None logging.info("Redirecting to login page: %s", request.get_full_path()) access_log(request, 'login redirection', level=access_log_level) no_idle_backends = ("libsaml.backend.SAML2Backend", "desktop.auth.backend.SpnegoDjangoBackend", "desktop.auth.backend.KnoxSpnegoDjangoBackend") if request.ajax and all(no_idle_backend not in AUTH.BACKEND.get() for no_idle_backend in no_idle_backends): # Send back a magic header which causes Hue.Request to interpose itself # in the ajax request and make the user login before resubmitting the # request. response = HttpResponse("/* login required */", content_type="text/javascript") response[MIDDLEWARE_HEADER] = 'LOGIN_REQUIRED' return response else: if request.GET.get('is_embeddable'): return JsonResponse( { 'url': "%s?%s=%s" % (settings.LOGIN_URL, REDIRECT_FIELD_NAME, quote('/hue' + request.get_full_path().replace( 'is_embeddable=true', '').replace('&&', '&'))) } ) # Remove embeddable so redirect from & to login works. Login page is not embeddable else: return HttpResponseRedirect( "%s?%s=%s" % (settings.LOGIN_URL, REDIRECT_FIELD_NAME, quote(request.get_full_path())))
def make_notebook(name='Browse', description='', editor_type='hive', statement='', status='ready', files=None, functions=None, settings=None, is_saved=False, database='default', snippet_properties=None, batch_submit=False, on_success_url=None, skip_historify=False, is_task=False, last_executed=-1, is_notebook=False, pub_sub_url=None, result_properties={}, namespace=None, compute=None, is_presentation_mode=False): ''' skip_historify: do not add the task to the query history. e.g. SQL Dashboard is_task / isManaged: true when being a managed by Hue operation (include_managed=True in document), e.g. exporting query result, dropping some tables ''' from notebook.connectors.hiveserver2 import HS2Api if has_connectors(): interpreter = get_interpreter(connector_type=editor_type) editor_connector = editor_type editor_type = interpreter['dialect'] else: editor_connector = editor_type editor = Notebook() if snippet_properties is None: snippet_properties = {} if editor_type == 'hive': sessions_properties = HS2Api.get_properties(editor_type) if files is not None: _update_property_value(sessions_properties, 'files', files) if functions is not None: _update_property_value(sessions_properties, 'functions', functions) if settings is not None: _update_property_value(sessions_properties, 'settings', settings) elif editor_type == 'impala': sessions_properties = HS2Api.get_properties(editor_type) if settings is not None: _update_property_value(sessions_properties, 'files', files) elif editor_type == 'java': sessions_properties = [] # Java options else: sessions_properties = [] data = { 'name': name, 'uuid': str(uuid.uuid4()), 'description': description, 'sessions': [{ 'type': editor_connector, 'properties': sessions_properties, 'id': None }], 'selectedSnippet': editor_connector, # TODO: might need update in notebook.ko.js 'type': 'notebook' if is_notebook else 'query-%s' % editor_type, 'showHistory': True, 'isSaved': is_saved, 'onSuccessUrl': urllib_quote(on_success_url.encode('utf-8'), safe=SAFE_CHARACTERS_URI) if on_success_url else None, 'pubSubUrl': pub_sub_url, 'skipHistorify': skip_historify, 'isPresentationModeDefault': is_presentation_mode, 'isManaged': is_task, 'snippets': [{ 'status': status, 'id': str(uuid.uuid4()), 'statement_raw': statement, 'statement': statement, 'type': editor_connector, 'wasBatchExecuted': batch_submit, 'lastExecuted': last_executed, 'properties': { 'files': [] if files is None else files, 'functions': [] if functions is None else functions, 'settings': [] if settings is None else settings }, 'name': name, 'database': database, 'namespace': namespace if namespace else {}, 'compute': compute if compute else {}, 'result': { 'handle': {} }, 'variables': [] }] if not is_notebook else [] } if has_connectors(): # To improve data['dialect'] = interpreter['dialect'] data['type'] = '%s-%s' % (editor_type, editor_connector ) # e.g. 'flink-' + editor_connector if snippet_properties: data['snippets'][0]['properties'].update(snippet_properties) if result_properties: data['snippets'][0]['result'].update(result_properties) editor.data = json.dumps(data) return editor
def get_query_server_config(name='beeswax', connector=None): if connector and has_connectors(): # TODO: Give empty connector when no connector in use query_server = get_query_server_config_via_connector(connector) else: LOG.debug("Query cluster %s" % name) if name == "llap": activeEndpoint = cache.get('llap') if activeEndpoint is None: if HIVE_DISCOVERY_LLAP.get(): LOG.debug("Checking zookeeper for Hive Server Interactive endpoint") zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True) zk.start() if HIVE_DISCOVERY_LLAP_HA.get(): znode = "{0}/instances".format(HIVE_DISCOVERY_LLAP_ZNODE.get()) LOG.debug("Setting up LLAP with the following node {0}".format(znode)) if zk.exists(znode): hiveservers = zk.get_children(znode) for server in hiveservers: llap_servers= json.loads(zk.get("{0}/{1}".format(znode, server))[0])["internal"][0] if llap_servers["api"] == "activeEndpoint": cache.set("llap", json.dumps({"host": llap_servers["addresses"][0]["host"], "port": llap_servers["addresses"][0]["port"]}), CACHE_TIMEOUT.get()) else: LOG.error("LLAP Endpoint not found, reverting to HiveServer2") cache.set("llap", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()}), CACHE_TIMEOUT.get()) else: znode = "{0}".format(HIVE_DISCOVERY_LLAP_ZNODE.get()) LOG.debug("Setting up LLAP with the following node {0}".format(znode)) if zk.exists(znode): hiveservers = zk.get_children(znode) for server in hiveservers: cache.set("llap", json.dumps({"host": server.split(';')[0].split('=')[1].split(":")[0], "port": server.split(';')[0].split('=')[1].split(":")[1]})) zk.stop() else: LOG.debug("Zookeeper Discovery not enabled, reverting to config values") cache.set("llap", json.dumps({"host": LLAP_SERVER_HOST.get(), "port": LLAP_SERVER_THRIFT_PORT.get()}), CACHE_TIMEOUT.get()) activeEndpoint = json.loads(cache.get("llap")) elif name != 'hms' and name != 'impala': activeEndpoint = cache.get("hiveserver2") if activeEndpoint is None: if HIVE_DISCOVERY_HS2.get(): zk = KazooClient(hosts=libzookeeper_conf.ENSEMBLE.get(), read_only=True) zk.start() znode = HIVE_DISCOVERY_HIVESERVER2_ZNODE.get() LOG.info("Setting up Hive with the following node {0}".format(znode)) if zk.exists(znode): hiveservers = zk.get_children(znode) server_to_use = 0 # if CONF.HIVE_SPREAD.get() randint(0, len(hiveservers)-1) else 0 cache.set("hiveserver2", json.dumps({"host": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[0], "port": hiveservers[server_to_use].split(";")[0].split("=")[1].split(":")[1]})) else: cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()})) zk.stop() else: cache.set("hiveserver2", json.dumps({"host": HIVE_SERVER_HOST.get(), "port": HIVE_HTTP_THRIFT_PORT.get()})) activeEndpoint = json.loads(cache.get("hiveserver2")) if name == 'impala': from impala.dbms import get_query_server_config as impala_query_server_config query_server = impala_query_server_config() elif name == 'hms': kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'hms', 'server_host': HIVE_METASTORE_HOST.get() if not cluster_config else cluster_config.get('server_host'), 'server_port': HIVE_METASTORE_PORT.get(), 'principal': kerberos_principal, 'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket', 'auth_username': AUTH_USERNAME.get(), 'auth_password': AUTH_PASSWORD.get(), 'use_sasl': HIVE_USE_SASL.get() } else: kerberos_principal = hive_site.get_hiveserver2_kerberos_principal(HIVE_SERVER_HOST.get()) query_server = { 'server_name': 'beeswax', 'server_host': activeEndpoint["host"], 'server_port': LLAP_SERVER_PORT.get() if name == 'llap' else HIVE_SERVER_PORT.get(), 'principal': kerberos_principal, 'http_url': '%(protocol)s://%(host)s:%(port)s/%(end_point)s' % { 'protocol': 'https' if hiveserver2_use_ssl() else 'http', 'host': activeEndpoint["host"], 'port': activeEndpoint["port"], 'end_point': hive_site.hiveserver2_thrift_http_path() }, 'transport_mode': 'http' if hive_site.hiveserver2_transport_mode() == 'HTTP' else 'socket', 'auth_username': AUTH_USERNAME.get(), 'auth_password': AUTH_PASSWORD.get(), 'use_sasl': HIVE_USE_SASL.get() } if name == 'sparksql': # Extends Hive as very similar from spark.conf import SQL_SERVER_HOST as SPARK_SERVER_HOST, SQL_SERVER_PORT as SPARK_SERVER_PORT, USE_SASL as SPARK_USE_SASL query_server.update({ 'server_name': 'sparksql', 'server_host': SPARK_SERVER_HOST.get(), 'server_port': SPARK_SERVER_PORT.get(), 'use_sasl': SPARK_USE_SASL.get() }) debug_query_server = query_server.copy() debug_query_server['auth_password_used'] = bool(debug_query_server.pop('auth_password', None)) LOG.debug("Query Server: %s" % debug_query_server) return query_server
def get_api(request, snippet): from notebook.connectors.oozie_batch import OozieApi if snippet.get('wasBatchExecuted') and not TASK_SERVER.ENABLED.get(): return OozieApi(user=request.user, request=request) if snippet['type'] == 'report': snippet['type'] = 'impala' interpreter = [ interpreter for interpreter in get_ordered_interpreters(request.user) if snippet['type'] in (interpreter['type'], interpreter['interface']) ] if not interpreter: if snippet['type'] == 'hbase': interpreter = [{ 'name': 'hbase', 'type': 'hbase', 'interface': 'hbase', 'options': {}, 'is_sql': False }] elif snippet['type'] == 'kafka': interpreter = [{ 'name': 'kafka', 'type': 'kafka', 'interface': 'kafka', 'options': {}, 'is_sql': False }] elif snippet['type'] == 'solr': interpreter = [{ 'name': 'solr', 'type': 'solr', 'interface': 'solr', 'options': {}, 'is_sql': False }] elif snippet['type'] == 'custom': interpreter = [{ 'name': snippet['name'], 'type': snippet['type'], 'interface': snippet['interface'], 'options': snippet.get('options', {}), 'is_sql': False }] else: raise PopupException(_('Snippet type %(type)s is not configured.') % snippet) interpreter = interpreter[0] interface = interpreter['interface'] # TODO: Multi cluster --> multi computes of a connector if has_connectors(): cluster = { 'connector': snippet['type'], 'id': interpreter['type'], } cluster.update(interpreter['options']) elif has_multi_cluster(): cluster = json.loads(request.POST.get('cluster', '""')) # Via Catalog autocomplete API or Notebook create sessions if cluster == '""' or cluster == 'undefined': cluster = None if not cluster and snippet.get('compute'): # Via notebook.ko.js cluster = snippet['compute'] else: cluster = None cluster_name = cluster.get('id') if cluster else None if cluster and 'altus:dataware:k8s' in cluster_name: interface = 'hiveserver2' elif cluster and 'crn:altus:dataware:' in cluster_name: interface = 'altus-adb' elif cluster and 'crn:altus:dataeng:' in cluster_name: interface = 'dataeng' LOG.debug('Selected connector %s %s interface=%s compute=%s' % (cluster_name, cluster, interface, snippet.get('compute'))) snippet['interface'] = interface if interface.startswith('hiveserver2') or interface == 'hms': from notebook.connectors.hiveserver2 import HS2Api return HS2Api(user=request.user, request=request, cluster=cluster, interface=interface) elif interface == 'oozie': return OozieApi(user=request.user, request=request) elif interface == 'livy': from notebook.connectors.spark_shell import SparkApi return SparkApi(request.user) elif interface == 'livy-batch': from notebook.connectors.spark_batch import SparkBatchApi return SparkBatchApi(request.user) elif interface == 'text' or interface == 'markdown': from notebook.connectors.text import TextApi return TextApi(request.user) elif interface == 'rdbms': from notebook.connectors.rdbms import RdbmsApi return RdbmsApi(request.user, interpreter=snippet['type'], query_server=snippet.get('query_server')) elif interface == 'altus-adb': from notebook.connectors.altus_adb import AltusAdbApi return AltusAdbApi(user=request.user, cluster_name=cluster_name, request=request) elif interface == 'dataeng': from notebook.connectors.dataeng import DataEngApi return DataEngApi(user=request.user, request=request, cluster_name=cluster_name) elif interface == 'jdbc': if interpreter['options'] and interpreter['options'].get('url', '').find('teradata') >= 0: from notebook.connectors.jdbc_teradata import JdbcApiTeradata return JdbcApiTeradata(request.user, interpreter=interpreter) if interpreter['options'] and interpreter['options'].get('url', '').find('awsathena') >= 0: from notebook.connectors.jdbc_athena import JdbcApiAthena return JdbcApiAthena(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get('url', '').find('presto') >= 0: from notebook.connectors.jdbc_presto import JdbcApiPresto return JdbcApiPresto(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get('url', '').find('clickhouse') >= 0: from notebook.connectors.jdbc_clickhouse import JdbcApiClickhouse return JdbcApiClickhouse(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get('url', '').find('vertica') >= 0: from notebook.connectors.jdbc_vertica import JdbcApiVertica return JdbcApiVertica(request.user, interpreter=interpreter) else: from notebook.connectors.jdbc import JdbcApi return JdbcApi(request.user, interpreter=interpreter) elif interface == 'teradata': from notebook.connectors.jdbc import JdbcApiTeradata return JdbcApiTeradata(request.user, interpreter=interpreter) elif interface == 'athena': from notebook.connectors.jdbc import JdbcApiAthena return JdbcApiAthena(request.user, interpreter=interpreter) elif interface == 'presto': from notebook.connectors.jdbc_presto import JdbcApiPresto return JdbcApiPresto(request.user, interpreter=interpreter) elif interface == 'sqlalchemy': from notebook.connectors.sql_alchemy import SqlAlchemyApi return SqlAlchemyApi(request.user, interpreter=interpreter) elif interface == 'solr': from notebook.connectors.solr import SolrApi return SolrApi(request.user, interpreter=interpreter) elif interface == 'hbase': from notebook.connectors.hbase import HBaseApi return HBaseApi(request.user) elif interface == 'kafka': from notebook.connectors.kafka import KafkaApi return KafkaApi(request.user) elif interface == 'pig': return OozieApi(user=request.user, request=request) # Backward compatibility until Hue 4 else: raise PopupException(_('Notebook connector interface not recognized: %s') % interface)
name='useradmin_views_list_for_autocomplete'), url(r'^desktop/api/users/?$', useradmin_views.get_users_by_id) ] dynamic_patterns += [ url(r'^desktop/api/vcs/contents/?$', desktop_lib_vcs_api.contents), url(r'^desktop/api/vcs/authorize/?$', desktop_lib_vcs_api.authorize), ] # Metrics specific if METRICS.ENABLE_WEB_METRICS.get(): dynamic_patterns += [ url(r'^desktop/metrics/?', include('desktop.lib.metrics.urls')) ] if has_connectors(): dynamic_patterns += [ url(r'^desktop/connectors/?', include('desktop.lib.connectors.urls')) ] if ANALYTICS.IS_ENABLED.get(): dynamic_patterns += [ url(r'^desktop/analytics/?', include('desktop.lib.analytics.urls')) ] dynamic_patterns += [ url(r'^scheduler/', include('desktop.lib.scheduler.urls')) ] dynamic_patterns += [ url(r'^admin/?', include(admin.site.urls)),