def analyze_table(request, database, table, columns=None): app_name = get_app_name(request) cluster = json.loads(request.POST.get('cluster', '{}')) query_server = get_query_server_config(app_name, cluster=cluster) db = dbms.get(request.user, query_server) table_obj = db.get_table(database, table) if table_obj.is_impala_only and app_name != 'impala': query_server = get_query_server_config('impala') db = dbms.get(request.user, query_server) response = {'status': -1, 'message': '', 'redirect': ''} if request.method == "POST": if columns is None: query_history = db.analyze_table(database, table) else: query_history = db.analyze_table_columns(database, table) response['watch_url'] = reverse('beeswax:api_watch_query_refresh_json', kwargs={'id': query_history.id}) response['status'] = 0 else: response['message'] = _('A POST request is required.') return JsonResponse(response)
def get_session(request, session_id=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) response = {'status': -1, 'message': ''} if session_id: session = Session.objects.get(id=session_id, owner=request.user, application=query_server['server_name']) else: # get the latest session for given user and server type session = Session.objects.get_session(request.user, query_server['server_name']) if session is not None: properties = json.loads(session.properties) # Redact passwords for key, value in properties.items(): if 'password' in key.lower(): properties[key] = '*' * len(value) response['status'] = 0 response['session'] = {'id': session.id, 'application': session.application, 'status': session.status_code} response['properties'] = properties else: response['message'] = _('Could not find session or no open sessions found.') return JsonResponse(response)
def database_initialize(request): """ Install the tables for this application """ # Connexion to the db query_server = get_query_server_config(name='impala') db = dbms.get(request.user, query_server=query_server) # The sql queries sql = "DROP TABLE IF EXISTS map_sample_id; CREATE TABLE map_sample_id (internal_sample_id STRING, customer_sample_id STRING, date_creation TIMESTAMP, date_modification TIMESTAMP); DROP TABLE IF EXISTS sample_files; CREATE TABLE sample_files (id STRING, internal_sample_id STRING, file_path STRING, file_type STRING, date_creation TIMESTAMP, date_modification TIMESTAMP);" # The clinical db sql += "DROP TABLE IF EXISTS clinical_sample; CREATE TABLE clinical_sample (sample_id STRING, patient_id STRING, date_of_collection STRING, original_sample_id STRING, status STRING, sample_type STRING, biological_contamination STRING, storage_condition STRING, biobank_id STRING, pn_id STRING);" #DROP TABLE IF EXISTS variants; CREATE TABLE variants (id STRING, alternate_bases STRING, calls STRING, names STRING, info STRING, reference_bases STRING, quality DOUBLE, created TIMESTAMP, elem_start BIGINT, elem_end BIGINT, variantset_id STRING); DROP TABLE IF EXISTS variantsets; #CREATE TABLE variantsets (id STRING, dataset_id STRING, metadata STRING, reference_bounds STRING); #DROP TABLE IF EXISTS datasets; CREATE TABLE datasets (id STRING, is_public BOOLEAN, name STRING);''' # Executing the different queries tmp = sql.split(";") for hql in tmp: hql = hql.strip() if hql: query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) return render('database.initialize.mako', request, locals())
def configuration(request): app_name = get_app_name(request) query_server = get_query_server_config(app_name) config_values = dbms.get(request.user, query_server).get_default_configuration( bool(request.REQUEST.get("include_hadoop", False))) return render("configuration.mako", request, {'config_values': config_values})
def autocomplete(request, database=None, table=None, column=None, nested=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) do_as = request.user if (request.user.is_superuser or request.user.has_hue_permission(action="impersonate", app="security")) and 'doas' in request.GET: do_as = User.objects.get(username=request.GET.get('doas')) db = dbms.get(do_as, query_server) response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: response['tables'] = db.get_tables(database=database) elif column is None: t = db.get_table(database, table) response['hdfs_link'] = t.hdfs_link response['columns'] = [column.name for column in t.cols] response['extended_columns'] = massage_columns_for_json(t.cols) else: col = db.get_column(database, table, column) if col: parse_tree = parser.parse_column(col.name, col.type, col.comment) if nested: parse_tree = _extract_nested_type(parse_tree, nested) response = parse_tree else: raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column)) except (QueryServerTimeoutException, TTransportException), e: response['code'] = 503 response['error'] = e.message
def patient_search(request): """ Search the data related to a given patient id or dossier no """ result = {'status': -1,'data': {}} if request.method != 'POST' or not request.POST or not request.POST['sample_id']: result['status'] = 0 return HttpResponse(json.dumps(result), mimetype="application/json") hql = "SELECT patient.patient_id, patient.dossier_no, patient.patient_family_name, patient.patient_first_name, ethnicity.ethnicity FROM patient JOIN ethnicity ON patient.ethnic_id = ethnicity.ethnic_id WHERE " if "patient_id" in request.POST: patient_id = str(request.POST['patient_id']) hql += "patient.patient_id = '"+patient_id+"';" elif "dossier_no" in request.POST: dossier_no = str(request.POST['dossier_no']) hql += "patient.dossier_no = '"+dossier_no+"';" # Database connexion query_server = get_query_server_config(name='impala') db = dbms.get(request.user, query_server=query_server) customer_sample_id = str(request.user.id)+"_"+sample_id # Selecting the files related to the sample id query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) if handle: data = db.fetch(handle, rows=100) result['status'] = 1 result['data'] = list(data.rows()) db.close(handle) # Returning the data return HttpResponse(json.dumps(result), mimetype="application/json")
def explain_query(request): response = {"status": -1, "message": ""} if request.method != "POST": response["message"] = _("A POST request is required.") app_name = get_app_name(request) query_type = beeswax_models.SavedQuery.TYPES_MAPPING[app_name] try: form = get_query_form(request) if form.is_valid(): query = SQLdesign(form, query_type=query_type) query_server = dbms.get_query_server_config(app_name) db = dbms.get(request.user, query_server) try: db.use(form.cleaned_data["database"]) datatable = db.explain(query) results = db.client.create_result(datatable) response["status"] = 0 response["results"] = results_to_dict(results) except Exception, e: response["status"] = -1 response["message"] = str(e) else:
def _guess_range(user, dashboard, field): hql = "SELECT MIN(%(field)s), MAX(%(field)s) FROM %(database)s.%(table)s" % { 'field': field['name'], 'database': dashboard['properties'][0]['database'], 'table': dashboard['properties'][0]['table'] } query_server = get_query_server_config(name='impala') db = dbms.get(user, query_server=query_server) query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=35.0) data = db.fetch(handle, rows=1) stats_min, stats_max = list(data.rows())[0] db.close(handle) _min, _m = _round_number_range(stats_min) _m, _max = _round_number_range(stats_max) properties = { 'min': stats_min, 'max': stats_max, 'start': _min, 'end': _max, #'gap': gap, 'canRange': True, 'isDate': False, } return properties
def config_validator(user): # dbms is dependent on beeswax.conf (this file) # import in method to avoid circular dependency from beeswax.design import hql_query from beeswax.server import dbms from beeswax.server.dbms import get_query_server_config res = [] try: try: if not 'test' in sys.argv: # Avoid tests hanging query_server = get_query_server_config(name='impala') server = dbms.get(user, query_server) query = hql_query("SELECT 'Hello World!';") handle = server.execute_and_wait(query, timeout_sec=10.0) if handle: server.fetch(handle, rows=100) server.close(handle) except StructuredThriftTransportException, ex: if 'TSocket read 0 bytes' in str(ex): # this message appears when authentication fails msg = "Failed to authenticate to Impalad, check authentication configurations." LOG.exception(msg) res.append((NICE_NAME, _(msg))) else: raise ex except Exception, ex: msg = "No available Impalad to send queries to." LOG.exception(msg) res.append((NICE_NAME, _(msg)))
def sample_search(request): """ Search the data related to a given sample id """ result = {'status': -1,'data': {}} if request.method != 'POST' or not request.POST or not request.POST['sample_id']: result['status'] = 0 return HttpResponse(json.dumps(result), mimetype="application/json") sample_id = str(request.POST['sample_id']) # Database connexion query_server = get_query_server_config(name='impala') db = dbms.get(request.user, query_server=query_server) customer_sample_id = str(request.user.id)+"_"+sample_id # Selecting the files related to the sample id hql = "SELECT sample_files.id, sample_files.file_path FROM sample_files JOIN map_sample_id ON sample_files.internal_sample_id = map_sample_id.internal_sample_id WHERE map_sample_id.customer_sample_id = '"+customer_sample_id+"';" query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) if handle: data = db.fetch(handle, rows=100) result['status'] = 1 result['data'] = list(data.rows()) db.close(handle) # Returning the data return HttpResponse(json.dumps(result), mimetype="application/json")
def init_example(request): """ Allow to make some test for the developpers, to see if the insertion and the querying of data is correct """ result = {'status': -1,'data': {}} query_server = get_query_server_config(name='impala') db = dbms.get(request.user, query_server=query_server) # Deleting the db hql = "DROP TABLE IF EXISTS val_test_2;" query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) # Creating the db hql = "CREATE TABLE val_test_2 (id int, token string);" query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) # Adding some data hql = " INSERT OVERWRITE val_test_2 values (1, 'a'), (2, 'b'), (-1,'xyzzy');" # hql = "INSERT INTO TABLE testset_bis VALUES (2, 25.0)" query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) # querying the data hql = "SELECT * FROM val_test_2" query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) if handle: data = db.fetch(handle, rows=100) result['data'] = list(data.rows()) db.close(handle) return render('database.initialize.mako', request, locals())
def teardown_class(cls): if is_live_cluster(): # Delete test DB and tables query_server = get_query_server_config() client = make_logged_in_client() user = User.objects.get(username='******') db = dbms.get(user, query_server) # Kill Spark context if running if is_hive_on_spark() and cluster.is_yarn(): # TODO: We should clean up the running Hive on Spark job here pass for db_name in [cls.db_name, '%s_other' % cls.db_name]: databases = db.get_databases() if db_name in databases: tables = db.get_tables(database=db_name) for table in tables: make_query(client, 'DROP TABLE IF EXISTS `%(db)s`.`%(table)s`' % {'db': db_name, 'table': table}, wait=True) make_query(client, 'DROP VIEW IF EXISTS `%(db)s`.`myview`' % {'db': db_name}, wait=True) make_query(client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db_name}, wait=True) # Check the cleanup databases = db.get_databases() assert_false(db_name in databases) global _INITIALIZED _INITIALIZED = False
def get_sample_data(request, database, table, column=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server) response = _get_sample_data(db, database, table, column) return JsonResponse(response)
def execute_query(request, design_id=None): """ View function for executing an arbitrary query. It understands the optional GET/POST params: on_success_url If given, it will be displayed when the query is successfully finished. Otherwise, it will display the view query results page by default. """ authorized_get_design(request, design_id) error_message = None form = QueryForm() action = request.path log = None app_name = get_app_name(request) query_type = SavedQuery.TYPES_MAPPING[app_name] design = safe_get_design(request, query_type, design_id) on_success_url = request.REQUEST.get('on_success_url') databases = [] query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server) try: databases = get_db_choices(request) except Exception, ex: error_message, log = expand_exception(ex, db)
def database_initialize(request): """ Install the tables for this application """ # The variant tables (impala and hbase) database_create_variants(request, temporary=False) # Connexion to the db query_server = get_query_server_config(name='impala') db = dbms.get(request.user, query_server=query_server) # The sql queries sql = "DROP TABLE IF EXISTS map_sample_id; CREATE TABLE map_sample_id (internal_sample_id STRING, customer_sample_id STRING, date_creation TIMESTAMP, date_modification TIMESTAMP); DROP TABLE IF EXISTS sample_files; CREATE TABLE sample_files (id STRING, internal_sample_id STRING, file_path STRING, file_type STRING, date_creation TIMESTAMP, date_modification TIMESTAMP) row format delimited fields terminated by ',' stored as textfile;" # The clinical db sql += "DROP TABLE IF EXISTS clinical_sample; CREATE TABLE clinical_sample (sample_id STRING, patient_id STRING, date_of_collection STRING, original_sample_id STRING, status STRING, sample_type STRING, biological_contamination STRING, storage_condition STRING, biobank_id STRING, pn_id STRING) row format delimited fields terminated by ',' stored as textfile;" # Executing the different queries tmp = sql.split(";") for hql in tmp: hql = hql.strip() if hql: query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) return render('database.initialize.mako', request, locals())
def create_session(self, lang='hive', properties=None): application = 'beeswax' if lang == 'hive' else lang session = Session.objects.get_session(self.user, application=application) if session is None: session = dbms.get(self.user, query_server=get_query_server_config(name=lang)).open_session(self.user) response = { 'type': lang, 'id': session.id } if not properties: config = None if USE_DEFAULT_CONFIGURATION.get(): config = DefaultConfiguration.objects.get_configuration_for_user(app=lang, user=self.user) if config is not None: properties = config.properties_list else: properties = self.get_properties(lang) response['properties'] = properties if lang == 'impala': impala_settings = session.get_formatted_properties() http_addr = next((setting['value'] for setting in impala_settings if setting['key'].lower() == 'http_addr'), None) response['http_addr'] = http_addr return response
def get_shared_beeswax_server(db_name='default'): global _SHARED_HIVE_SERVER global _SHARED_HIVE_SERVER_CLOSER if _SHARED_HIVE_SERVER is None: cluster = pseudo_hdfs4.shared_cluster() if is_live_cluster(): def s(): pass else: s = _start_mini_hs2(cluster) start = time.time() started = False sleep = 1 make_logged_in_client() user = User.objects.get(username='******') query_server = get_query_server_config() db = dbms.get(user, query_server) while not started and time.time() - start <= 30: try: db.open_session(user) started = True break except Exception, e: LOG.info('HiveServer2 server could not be found after: %s' % e) time.sleep(sleep) if not started: raise Exception("Server took too long to come up.") _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s
def put(self, request, original_variant, pk): # Allow to modify a variant in HBase/Impala f = open('/tmp/superhello.txt','w') f.write(json.dumps(request.data)) f.close() # We convert the original and modified data to flatjson fc = formatConverters(input_file='stuff.json',output_file='stuff.json') original_flatjson = fc.convertVariantJsonToFlatJson(json_data=original_variant) modified_flatjson = fc.convertVariantJsonToFlatJson(json_data=request.data) # We convert the data to hbase, and we modify directly some fields (note: the keys are almost the same for hbase and impala) hbase_data = fc.convertVariantFlatJsonToHbase(original_data=original_flatjson,modified_data=modified_flatjson) # Impala - We make the query query_server = get_query_server_config(name='impala') db = dbms.get(request.user, query_server=query_server) query = hql_query("INSERT INTO variant("+",".join(query_data)+")") handle = db.execute_and_wait(query, timeout_sec=5.0) if handle: db.close(handle) else: raise Exception("Impossible to create the variant...") # HBase - We add the data in that table too hbaseApi = HbaseApi(user=request.user) currentCluster = hbaseApi.getClusters().pop() rowkey = pk hbaseApi.putRow(cluster=currentCluster['name'], tableName='variants', row=rowkey, data=hbase_data)
def get_query_form(request): try: # Get database choices query_server = dbms.get_query_server_config(get_app_name(request)) db = dbms.get(request.user, query_server) databases = [(database, database) for database in db.get_databases()] except Exception, e: raise PopupException(_('Unable to access databases, Query Server or Metastore may be down.'), detail=e)
def _run_parameterized_query(request, design_id, explain): """ Given a design and arguments to parameterize that design, runs the query. - explain is a boolean to determine whether to run as an explain or as an execute. This is an extra "step" in the flow from execute_query. """ design = authorized_get_design(request, design_id, must_exist=True) # Reconstitute the form design_obj = beeswax.design.HQLdesign.loads(design.data) query_form = QueryForm() params = design_obj.get_query_dict() params.update(request.POST) databases = _get_db_choices(request) query_form.bind(params) query_form.query.fields["database"].choices = databases # Could not do it in the form if not query_form.is_valid(): raise PopupException(_("Query form is invalid: %s") % query_form.errors) query_str = query_form.query.cleaned_data["query"] app_name = get_app_name(request) query_server = get_query_server_config(app_name) query_type = SavedQuery.TYPES_MAPPING[app_name] parameterization_form_cls = make_parameterization_form(query_str) if not parameterization_form_cls: raise PopupException(_("Query is not parameterizable.")) parameterization_form = parameterization_form_cls(request.REQUEST, prefix="parameterization") if parameterization_form.is_valid(): real_query = substitute_variables(query_str, parameterization_form.cleaned_data) query = HQLdesign(query_form, query_type=query_type) query._data_dict["query"]["query"] = real_query try: if explain: return explain_directly(request, query, design, query_server) else: return execute_directly(request, query, query_server, design) except Exception, ex: db = dbms.get(request.user, query_server) error_message, log = expand_exception(ex, db) return render( "execute.mako", request, { "action": reverse(get_app_name(request) + ":execute_query"), "design": design, "error_message": error_message, "form": query_form, "log": log, "autocomplete_base_url": reverse(get_app_name(request) + ":autocomplete", kwargs={}), }, )
def _get_db(self, snippet): if snippet["type"] == "hive": name = "beeswax" elif snippet["type"] == "impala": name = "impala" else: name = "spark-sql" return dbms.get(self.user, query_server=get_query_server_config(name=name))
def set_execution_engine(cls): query_server = get_query_server_config() if query_server['server_name'] == 'beeswax' and is_hive_on_spark(): user = User.objects.get(username='******') db = dbms.get(user, query_server) LOG.info("Setting Hive execution engine to Spark") db.execute_statement('SET hive.execution.engine=spark')
def create_session(self, lang="hive", properties=None): application = "beeswax" if lang == "hive" else lang session = Session.objects.get_session(self.user, application=application) if session is None: session = dbms.get(self.user, query_server=get_query_server_config(name=lang)).open_session(self.user) return {"type": lang, "id": session.id, "properties": session.get_formatted_properties()}
def get_sample_data(request, database, table, column=None): app_name = get_app_name(request) cluster = json.loads(request.POST.get('cluster', '{}')) query_server = get_query_server_config(app_name, cluster=cluster) db = dbms.get(request.user, query_server) response = _get_sample_data(db, database, table, column, cluster=cluster) return JsonResponse(response)
def configuration(request): app_name = get_app_name(request) query_server = get_query_server_config(app_name) config_values = dbms.get(request.user, query_server).get_default_configuration( bool(request.REQUEST.get("include_hadoop", False))) for value in config_values: if 'password' in value.key.lower(): value.value = "*" * 10 return render("configuration.mako", request, {'config_values': config_values})
def _get_db(self, snippet): if snippet['type'] == 'hive': name = 'beeswax' elif snippet['type'] == 'impala': name = 'impala' else: name = 'spark-sql' return dbms.get(self.user, query_server=get_query_server_config(name=name))
def autocomplete(request, database=None, table=None, column=None, nested=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) do_as = request.user if (request.user.is_superuser or request.user.has_hue_permission(action="impersonate", app="security")) and 'doas' in request.GET: do_as = User.objects.get(username=request.GET.get('doas')) db = dbms.get(do_as, query_server) response = _autocomplete(db, database, table, column, nested) return JsonResponse(response)
def get(user, query_server=None): # Avoid circular dependency from ImpalaService import ImpalaHiveServer2Service, ImpalaService if query_server is None: query_server = get_query_server_config(name='impala') if query_server['server_interface'] == HIVE_SERVER2: return ImpalaServerClient(ImpalaHiveServer2Service, query_server, user) else: return ImpalaServerClient(ImpalaService, query_server, user)
def execute(request, design_id=None): response = {'status': -1, 'message': ''} if request.method != 'POST': response['message'] = _('A POST request is required.') app_name = get_app_name(request) query_server = get_query_server_config(app_name) query_type = beeswax.models.SavedQuery.TYPES_MAPPING[app_name] design = safe_get_design(request, query_type, design_id) try: query_form = get_query_form(request) if query_form.is_valid(): query_str = query_form.query.cleaned_data["query"] explain = request.GET.get('explain', 'false').lower() == 'true' design = save_design(request, query_form, query_type, design, False) if query_form.query.cleaned_data['is_parameterized']: # Parameterized query parameterization_form_cls = make_parameterization_form(query_str) if parameterization_form_cls: parameterization_form = parameterization_form_cls(request.REQUEST, prefix="parameterization") if parameterization_form.is_valid(): parameters = parameterization_form.cleaned_data real_query = substitute_variables(query_str, parameters) query = HQLdesign(query_form, query_type=query_type) query._data_dict['query']['query'] = real_query try: if explain: return explain_directly(request, query, design, query_server) else: return execute_directly(request, query, design, query_server, parameters=parameters) except Exception, ex: db = dbms.get(request.user, query_server) error_message, log = expand_exception(ex, db) response['message'] = error_message return JsonResponse(response) else: response['errors'] = parameterization_form.errors return JsonResponse(response) # Non-parameterized query query = HQLdesign(query_form, query_type=query_type) if request.GET.get('explain', 'false').lower() == 'true': return explain_directly(request, query, design, query_server) else: return execute_directly(request, query, design, query_server) else:
def get_session(request): app_name = get_app_name(request) query_server = get_query_server_config(app_name) session = Session.objects.get_session(request.user, query_server['server_name']) if session: properties = json.loads(session.properties) else: properties = {} return JsonResponse({'properties': properties})
def get_session(request): app_name = get_app_name(request) query_server = get_query_server_config(app_name) session = Session.objects.get_session(request.user, query_server['server_name']) if session: properties = json.loads(session.properties) else: properties = {} return JsonResponse({'properties': properties})
def create_session(self, lang='hive', properties=None): application = 'beeswax' if lang == 'hive' else lang session = Session.objects.get_session(self.user, application=application) if session is None: session = dbms.get(self.user, query_server=get_query_server_config(name=lang)).open_session(self.user) return { 'type': lang, 'id': session.id, 'properties': session.get_formatted_properties() }
def get_top_terms(request, database, table, column, prefix=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server) response = {'status': -1, 'message': '', 'redirect': ''} terms = db.get_top_terms(database, table, column, prefix=prefix, limit=int(request.GET.get('limit', 30))) response['terms'] = terms response['status'] = 0 return JsonResponse(response)
def get_settings(request): query_server = dbms.get_query_server_config(get_app_name(request)) db = dbms.get(request.user, query_server) response = {'status': -1} settings = db.get_configuration() if settings: response['status'] = 0 response['settings'] = settings else: response['message'] = _('Failed to get settings.') return JsonResponse(response)
def _get_db(user, source_type=None): if source_type is None: cluster_config = get_cluster_config(user) if FORCE_HS2_METADATA.get() and cluster_config['app_config'].get( 'editor') and 'hive' in cluster_config['app_config'].get( 'editor')['interpreter_names']: source_type = 'hive' else: source_type = cluster_config['default_sql_interpreter'] query_server = get_query_server_config( name=source_type if source_type != 'hive' else 'beeswax') return dbms.get(user, query_server)
def get_query_server_config(self): from beeswax.server.dbms import get_query_server_config query_server = get_query_server_config( QueryHistory.get_type_name(self.query_type)) query_server.update({ 'server_name': self.server_name, # 'server_host': self.server_host, # Always use the live server configuration as the session is currently tied to the connection # 'server_port': int(self.server_port), 'server_type': self.server_type, }) return query_server
def get_query_server_config(self): from beeswax.server.dbms import get_query_server_config query_server = get_query_server_config( QueryHistory.get_type_name(self.query_type)) query_server.update({ 'server_name': self.server_name, 'server_host': self.server_host, 'server_port': self.server_port, 'server_type': self.server_type, }) return query_server
def describe_table(request, database, table): app_name = get_app_name(request) query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server) try: table = db.get_table(database, table) except Exception, e: LOG.exception("Describe table error") if hasattr(e, 'message') and e.message: raise PopupException(_("Hive Error"), detail=e.message) else: raise PopupException(_("Hive Error"), detail=e)
def __init__(self, data_dict, app_name): self.name = data_dict['table_name'] self.filename = data_dict['data_file'] self.hql = data_dict['create_hql'] self.query_server = get_query_server_config(app_name) # Sanity check self._data_dir = beeswax.conf.LOCAL_EXAMPLES_DATA_DIR.get() self._contents_file = os.path.join(self._data_dir, self.filename) if not os.path.isfile(self._contents_file): msg = _('Cannot find table data in "%(file)s".') % {'file': self._contents_file} LOG.error(msg) raise ValueError(msg)
def get_indexes(request, database, table): query_server = dbms.get_query_server_config(get_app_name(request)) db = dbms.get(request.user, query_server) response = {'status': -1} indexes = db.get_indexes(database, table) if indexes: response['status'] = 0 response['headers'] = indexes.cols() response['rows'] = escape_rows(indexes.rows(), nulls_only=True) else: response['message'] = _('Failed to get indexes.') return JsonResponse(response)
def get_query_form(request): # Get database choices query_server = dbms.get_query_server_config(get_app_name(request)) db = dbms.get(request.user, query_server) databases = [(database, database) for database in db.get_databases()] if not databases: raise RuntimeError(_("No databases are available. Permissions could be missing.")) query_form = QueryForm() query_form.bind(request.POST) query_form.query.fields['database'].choices = databases # Could not do it in the form return query_form
def test_get_llap(self): with patch('beeswax.conf.LLAP_SERVER_HOST.get') as LLAP_SERVER_HOST: with patch( 'beeswax.conf.LLAP_SERVER_PORT.get') as LLAP_SERVER_PORT: LLAP_SERVER_HOST.return_value = 'hive-llap.gethue.com' LLAP_SERVER_PORT.return_value = 10002 query_server = get_query_server_config(name='llap') assert_equal(query_server['server_name'], 'beeswax') assert_equal(query_server['server_host'], 'hive-llap.gethue.com') assert_equal(query_server['server_port'], 10002)
def get_sample_data(request, database, table): query_server = dbms.get_query_server_config(get_app_name(request)) db = dbms.get(request.user, query_server) response = {'status': -1} table_obj = db.get_table(database, table) sample_data = db.get_sample(database, table_obj) if sample_data: response['status'] = 0 response['headers'] = sample_data.cols() response['rows'] = escape_rows(sample_data.rows(), nulls_only=True) else: response['message'] = _('Failed to get sample data.') return JsonResponse(response)
def get_functions(request): query_server = dbms.get_query_server_config(get_app_name(request)) db = dbms.get(request.user, query_server) response = {'status': -1} prefix = request.GET.get('prefix', None) functions = db.get_functions(prefix) if functions: response['status'] = 0 rows = escape_rows(functions.rows(), nulls_only=True) response['functions'] = [row[0] for row in rows] else: response['message'] = _('Failed to get functions.') return JsonResponse(response)
def get_query_form(request): try: try: # Get database choices query_server = dbms.get_query_server_config(get_app_name(request)) db = dbms.get(request.user, query_server) databases = [(database, database) for database in db.get_databases()] except StructuredThriftTransportException, e: # If Thrift exception was due to failed authentication, raise corresponding message if 'TSocket read 0 bytes' in str(e) or 'Error validating the login' in str(e): raise PopupException(_('Failed to authenticate to query server, check authentication configurations.'), detail=e) else: raise e except Exception, e: raise PopupException(_('Unable to access databases, Query Server or Metastore may be down.'), detail=e)
def analyze_table(request, database, table, columns=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server) table_obj = db.get_table(database, table) if table_obj.is_impala_only and app_name != 'impala': query_server = get_query_server_config('impala') db = dbms.get(request.user, query_server) response = {'status': -1, 'message': '', 'redirect': ''} if request.method == "POST": if columns is None: query_history = db.analyze_table(database, table) else: query_history = db.analyze_table_columns(database, table) response['watch_url'] = reverse('beeswax:api_watch_query_refresh_json', kwargs={'id': query_history.id}) response['status'] = 0 else: response['message'] = _('A POST request is required.') return JsonResponse(response)
def get_indexes(request, database, table): query_server = dbms.get_query_server_config(get_app_name(request)) db = dbms.get(request.user, query_server) response = {'status': -1, 'error_message': ''} indexes = db.get_indexes(database, table) if indexes: response['status'] = 0 response['headers'] = indexes.cols() response['rows'] = escape_rows(indexes.rows(), nulls_only=True) else: response['error_message'] = _( 'Index data took too long to be generated') return JsonResponse(response)
def get_shared_beeswax_server(db_name='default'): global _SHARED_HIVE_SERVER global _SHARED_HIVE_SERVER_CLOSER with _SHARED_HIVE_SERVER_LOCK: if _SHARED_HIVE_SERVER is None: cluster = pseudo_hdfs4.shared_cluster() if is_live_cluster(): def s(): pass else: s = _start_mini_hs2(cluster) start = time.time() started = False sleep = 1 make_logged_in_client() user = User.objects.get(username='******') query_server = get_query_server_config() db = dbms.get(user, query_server) while not started and time.time() - start <= 60: try: db.open_session(user) except StructuredThriftTransportException as e: LOG.exception('Failed to open Hive Server session') # Don't loop if we had an authentication error. if 'Bad status: 3' in e.message: raise except Exception as e: LOG.exception('Failed to open Hive Server session') else: started = True break time.sleep(sleep) sleep *= 2 if not started: raise Exception("Server took too long to come up.") _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s return _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER
def create_session(self, lang='hive', properties=None): application = 'beeswax' if lang == 'hive' or lang == 'llap' else lang if has_session_pool(): session = Session.objects.get_tez_session( self.user, application, MAX_NUMBER_OF_SESSIONS.get()) elif not has_multiple_sessions(): session = Session.objects.get_session(self.user, application=application) else: session = None reuse_session = session is not None if not reuse_session: db = dbms.get(self.user, query_server=get_query_server_config( name=lang, connector=self.interpreter)) session = db.open_session(self.user) response = {'type': lang, 'id': session.id} if not properties: config = None if USE_DEFAULT_CONFIGURATION.get(): config = DefaultConfiguration.objects.get_configuration_for_user( app=lang, user=self.user) if config is not None: properties = config.properties_list else: properties = self.get_properties(lang) response['properties'] = properties response['configuration'] = json.loads(session.properties) response['reuse_session'] = reuse_session response['session_id'] = '' try: decoded_guid = session.get_handle().sessionId.guid response['session_id'] = unpack_guid(decoded_guid) except Exception as e: LOG.warn('Failed to decode session handle: %s' % e) if lang == 'impala' and session: http_addr = _get_impala_server_url(session) response['http_addr'] = http_addr return response
def refresh_tables(request): app_name = get_app_name(request) query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server=query_server) response = {'status': 0, 'message': ''} if request.method == "POST": try: database = json.loads(request.POST['database']) added = json.loads(request.POST['added']) removed = json.loads(request.POST['removed']) db.invalidate_tables(database, added + removed) except Exception, e: response['message'] = str(e)
def configuration(request): app_name = get_app_name(request) query_server = get_query_server_config(app_name) session = Session.objects.get_session(request.user, query_server['server_name']) if session: properties = json.loads(session.properties) # Redact passwords for key, value in properties.items(): if 'password' in key.lower(): properties[key] = '*' * len(value) else: properties = {} return render("configuration.mako", request, {'configuration': properties})
def _get_db(self, snippet, is_async=False, interpreter=None): if not is_async and snippet['type'] == 'hive': name = 'beeswax' elif snippet['type'] == 'hive': name = 'hive' elif snippet['type'] == 'llap': name = 'llap' elif snippet['type'] == 'impala': name = 'impala' else: name = 'sparksql' return dbms.get(self.user, query_server=get_query_server_config( name=name, connector=interpreter) ) # Note: name is not used if interpreter is present
def config_validator(user): # dbms is dependent on beeswax.conf (this file) # import in method to avoid circular dependency from beeswax.server import dbms from beeswax.server.dbms import get_query_server_config res = [] try: if not 'test' in sys.argv: # Avoid tests hanging query_server = get_query_server_config(name='impala') server = dbms.get(user, query_server) server.get_databases() except: res.append((NICE_NAME, _("No available Impalad to send queries to."))) return res
def get_sample_data(request, database, table): query_server = dbms.get_query_server_config(get_app_name(request)) db = dbms.get(request.user, query_server) response = {'status': -1, 'error_message': ''} table_obj = db.get_table(database, table) sample_data = db.get_sample(database, table_obj) if sample_data: response['status'] = 0 response['headers'] = sample_data.cols() response['rows'] = escape_rows(sample_data.rows(), nulls_only=True) else: response['error_message'] = _( 'Sample data took too long to be generated') return JsonResponse(response)
def _run_parameterized_query(request, design_id, explain): """ Given a design and arguments to parameterize that design, runs the query. - explain is a boolean to determine whether to run as an explain or as an execute. This is an extra "step" in the flow from execute_query. """ design = authorized_get_design(request, design_id, must_exist=True) # Reconstitute the form design_obj = beeswax.design.HQLdesign.loads(design.data) query_form = QueryForm() params = design_obj.get_query_dict() params.update(request.POST) query_form.bind(params) assert query_form.is_valid() query_str = query_form.query.cleaned_data["query"] query_server = get_query_server_config(get_app_name(request)) parameterization_form_cls = make_parameterization_form(query_str) if not parameterization_form_cls: raise PopupException(_("Query is not parameterizable.")) parameterization_form = parameterization_form_cls(request.REQUEST, prefix="parameterization") if parameterization_form.is_valid(): real_query = substitute_variables(query_str, parameterization_form.cleaned_data) query = HQLdesign(query_form) query._data_dict['query']['query'] = real_query try: if explain: return explain_directly(request, query, design, query_server) else: return execute_directly(request, query, query_server, design) except Exception, ex: db = dbms.get(request.user, query_server) error_message, log = expand_exception(ex, db) return render('execute.mako', request, { 'action': reverse(get_app_name(request) + ':execute_query'), 'design': design, 'error_message': error_message, 'form': query_form, 'log': log, 'autocomplete_base_url': reverse(get_app_name(request) + ':autocomplete', kwargs={}), })
def autocomplete(request, database=None, table=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server) response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: response['tables'] = db.get_tables(database=database) else: t = db.get_table(database, table) response['columns'] = [column.name for column in t.cols] except Exception, e: LOG.warn('Autocomplete data fetching error %s.%s: %s' % (database, table, e)) response['error'] = e.message
def get_table_stats(request, database, table, column=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server) response = {'status': -1, 'message': '', 'redirect': ''} if column is not None: stats = db.get_table_columns_stats(database, table, column) else: table = db.get_table(database, table) stats = table.stats response['stats'] = stats response['status'] = 0 return JsonResponse(response)
def close_session(self, session): app_name = session.get('type') session_id = session.get('id') source_method = session.get("sourceMethod") if not session_id: session = Session.objects.get_session(self.user, application=app_name) decoded_guid = session.get_handle().sessionId.guid session_decoded_id = unpack_guid(decoded_guid) if source_method == "dt_logout": LOG.debug( "Closing Impala session id %s on logout for user %s" % (session_decoded_id, self.user.username)) query_server = get_query_server_config(name=app_name) response = {'status': -1, 'message': ''} session_record = None try: filters = { 'id': session_id, 'application': query_server['server_name'] } if not is_admin(self.user): filters['owner'] = self.user session_record = Session.objects.get(**filters) except Session.DoesNotExist: response['message'] = _( 'Session does not exist or you do not have permissions to close the session.' ) if session_record: session_record = dbms.get( self.user, query_server).close_session(session_record) response['status'] = 0 response['message'] = _('Session successfully closed.') response['session'] = { 'id': session_id, 'application': session_record.application, 'status': session_record.status_code } return response
def autocomplete(request, database=None, table=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) db = dbms.get(request.user, query_server) response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: response['tables'] = db.get_tables(database=database) else: t = db.get_table(database, table) response['columns'] = [column.name for column in t.cols] response['extended_columns'] = massage_columns_for_json(t.cols) except TTransportException, tx: response['code'] = 503 response['error'] = tx.message
def _get_db(self, snippet, is_async=False, interpreter=None): if interpreter and interpreter.get('dialect'): dialect = interpreter['dialect'] else: dialect = snippet['type'] # Backward compatibility without connectors if not is_async and dialect == 'hive': name = 'beeswax' elif dialect == 'hive': name = 'hive' elif dialect == 'llap': name = 'llap' elif dialect == 'impala': name = 'impala' else: name = 'sparksql' # Note: name is not used if interpreter is present return dbms.get(self.user, query_server=get_query_server_config(name=name, connector=interpreter))