def _save_results_ctas(request, query_history, target_table, result_meta): """ Handle saving results as a new table. Returns HTTP response. May raise BeeswaxException, IOError. """ query_server = query_history.get_query_server() # Query server requires DDL support # Case 1: The results are straight from an existing table if result_meta.in_tablename: hql = 'CREATE TABLE `%s` AS SELECT * FROM %s' % (target_table, result_meta.in_tablename) query_msg = make_beeswax_query(request, hql) # Display the CTAS running. Could take a long time. return execute_directly(request, query_msg, query_server, on_success_url=urlresolvers.reverse(show_tables)) # Case 2: The results are in some temporary location # 1. Create table cols = '' schema = result_meta.schema for i, field in enumerate(schema.fieldSchemas): if i != 0: cols += ',\n' cols += '`%s` %s' % (field.name, field.type) # The representation of the delimiter is messy. # It came from Java as a string, which might has been converted from an integer. # So it could be "1" (^A), or "10" (\n), or "," (a comma literally). delim = result_meta.delim if not delim.isdigit(): delim = str(ord(delim)) hql = ''' CREATE TABLE `%s` ( %s ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\%s' STORED AS TextFile ''' % (target_table, cols, delim.zfill(3)) query_msg = make_beeswax_query(request, hql) db_utils.execute_and_wait(request.user, query_msg, query_server) try: # 2. Move the results into the table's storage table_obj = db_utils.meta_client().get_table("default", target_table) table_loc = request.fs.urlsplit(table_obj.sd.location)[2] request.fs.rename_star(result_meta.table_dir, table_loc) LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc)) messages.info(request, _('Saved query results as new table %(table)s') % {'table': target_table}) query_history.save_state(models.QueryHistory.STATE.expired) except Exception, ex: LOG.error('Error moving data into storage of table %s. Will drop table.' % (target_table,)) query_msg = make_beeswax_query(request, 'DROP TABLE `%s`' % (target_table,)) try: db_utils.execute_directly(request.user, query_msg, query_server) # Don't wait for results except Exception, double_trouble: LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble))
def execute_directly(request, query_msg, design=None, tablename=None, on_success_url=None, on_success_params=None, **kwargs): """ execute_directly(request, query_msg, tablename, design) -> HTTP response for execution This method wraps around db_utils.execute_directly() to take care of the HTTP response after the execution. query_msg The thrift Query object. design The design associated with the query. tablename The associated table name for the context. on_success_url Where to go after the query is done. The URL handler may expect an option "context" GET param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in which case the on complete URL is the return of: on_success_url(history_obj) -> URL string Defaults to the view results page. on_success_params Optional params to pass to the on_success_url (in additional to "context"). Note that this may throw a Beeswax exception. """ if design is not None: authorized_get_design(request, design.id) history_obj = db_utils.execute_directly(request.user, query_msg, design, **kwargs) watch_url = urlresolvers.reverse("hcatalog.views.watch_query", kwargs=dict(id=history_obj.id)) # Prepare the GET params for the watch_url get_dict = QueryDict(None, mutable=True) # (1) context if design: get_dict['context'] = make_query_context("design", design.id) elif tablename: get_dict['context'] = make_query_context("table", tablename) # (2) on_success_url if on_success_url: if callable(on_success_url): on_success_url = on_success_url(history_obj) get_dict['on_success_url'] = on_success_url # (3) misc if on_success_params: get_dict.update(on_success_params) return format_preserving_redirect(request, watch_url, get_dict)
def execute_directly(request, query_msg, query_server=None, design=None, tablename=None, on_success_url=None, on_success_params=None, **kwargs): """ execute_directly(request, query_msg, tablename, design) -> HTTP response for execution This method wraps around db_utils.execute_directly() to take care of the HTTP response after the execution. query_msg The thrift Query object. query_server To which Query Server to submit the query. Dictionary with keys: ['server_name', 'server_host', 'server_port']. design The design associated with the query. tablename The associated table name for the context. on_success_url Where to go after the query is done. The URL handler may expect an option "context" GET param. (See ``watch_query``.) For advanced usage, on_success_url can be a function, in which case the on complete URL is the return of: on_success_url(history_obj) -> URL string Defaults to the view results page. on_success_params Optional params to pass to the on_success_url (in additional to "context"). Note that this may throw a Beeswax exception. """ if design is not None: authorized_get_design(request, design.id) history_obj = db_utils.execute_directly(request.user, query_msg, query_server, design, **kwargs) watch_url = urlresolvers.reverse("beeswax.views.watch_query", kwargs=dict(id=history_obj.id)) # Prepare the GET params for the watch_url get_dict = QueryDict(None, mutable=True) # (1) context if design: get_dict['context'] = make_query_context("design", design.id) elif tablename: get_dict['context'] = make_query_context("table", tablename) # (2) on_success_url if on_success_url: if callable(on_success_url): on_success_url = on_success_url(history_obj) get_dict['on_success_url'] = on_success_url # (3) misc if on_success_params: get_dict.update(on_success_params) return format_preserving_redirect(request, watch_url, get_dict)