def get_table_schema(self, table_name, database_name=None): if database_name is None: database_name = '.*' def op(): self._last_operation_string = "RPC_DESCRIBE_TABLE" self._last_operation_handle = impala.rpc.get_table_schema( self.service, self.session_handle, table_name, database_name) self._execute_sync(op) results = self.fetchall() if len(results) == 0: # TODO: the error raised here should be different raise RPCError("no schema results for table %s.%s" % (database_name, table_name)) # check that results are derived from a unique table tables = set() for col in results: tables.add((col[1], col[2])) if len(tables) > 1: # TODO: the error raised here should be different raise RPCError("db: %s, table: %s is not unique" % (database_name, table_name)) return [(r[3], impala.rpc._PrimitiveType_to_TTypeId[r[5]]) for r in results]
def __do_rpc(rpc): """Executes the provided callable.""" # if not self.connected: # raise DisconnectedError( # "Not connected (use CONNECT to establish a connection)") # return None, RpcStatus.ERROR try: ret = rpc() status = RpcStatus.OK # TODO: In the future more advanced error detection/handling can be # done based on the TStatus return value. For now, just print any # error(s) that were encountered and validate the result of the # operation was a success. if ret is not None and isinstance(ret, TStatus): if ret.status_code != TStatusCode.OK: print((ret.error_msgs)) if ret.error_msgs: raise RPCError('RPC Error: %s' % '\n'.join(ret.error_msgs)) status = RpcStatus.ERROR return ret, status except BeeswaxService.QueryNotFoundException: raise QueryStateError('Error: Stale query handle') # beeswaxException prints out the entire object, printing # just the message is far more readable/helpful. except BeeswaxService.BeeswaxException as b: raise RPCError("ERROR: %s" % (b.message)) except TTransportException as e: # issue with the connection with the impalad raise DisconnectedError("Error communicating with impalad: %s" % e) except TApplicationException as t: raise RPCError("Application Exception : %s" % (t)) return None, RpcStatus.ERROR
def close_insert(service, last_query_handle): """Fetches the results of an INSERT query""" rpc_result = __do_rpc(lambda: service.CloseInsert(last_query_handle)) insert_result, status = rpc_result if status != RpcStatus.OK: raise RPCError() num_rows = sum([int(k) for k in insert_result.rows_appended.values()]) return num_rows
def build_default_query_options_dict(service): # The default query options are retrieved from a hs2_client call, and are dependent # on the impalad to which a connection has been established. They need to be # refreshed each time a connection is made. This is particularly helpful when # there is a version mismatch between the shell and the impalad. try: get_default_query_options = service.get_default_configuration(False) except: return {} rpc_result = __do_rpc(lambda: get_default_query_options) options, status = rpc_result if status != RpcStatus.OK: raise RPCError("Unable to retrieve default query options") return options
def fetch_internal(service, last_query_handle, buffer_size): """Fetch all the results. This function serves a generator to create an iterable of the results. Result rows are passed to the shell.""" result_rows = [] while True: rpc_result = __do_rpc( lambda: service.fetch(last_query_handle, False, buffer_size)) result, status = rpc_result if status != RpcStatus.OK: raise RPCError() result_rows.extend(result.data) if len(result_rows) >= buffer_size or not result.has_more: rows = [row.split('\t') for row in result_rows] return rows
def wrapper(*args, **kwargs): # get the thrift transport if 'service' in kwargs: transport = kwargs['service']._iprot.trans elif len(args) > 0 and isinstance(args[0], TCLIService.Client): transport = args[0]._iprot.trans else: raise RPCError("RPC function does not have expected 'service' arg") tries_left = 3 while tries_left > 0: try: if not transport.isOpen(): transport.open() return func(*args, **kwargs) except socket.error as e: pass except TTransportException as e: pass except Exception as e: raise transport.close() tries_left -= 1 raise
def execute_statement(service, query): rpc_result = __do_rpc(lambda: service.query(query)) last_query_handle, status = rpc_result if status != RpcStatus.OK: raise RPCError("Error executing the query") return last_query_handle
print(ret.error_msgs) if ret.error_msgs: raise RPCError('RPC Error: %s' % '\n'.join(ret.error_msgs)) status = RpcStatus.ERROR return ret, status except BeeswaxService.QueryNotFoundException: raise QueryStateError('Error: Stale query handle') # beeswaxException prints out the entire object, printing # just the message is far more readable/helpful. except BeeswaxService.BeeswaxException, b: raise RPCError("ERROR: %s" % (b.message)) except TTransportException, e: # issue with the connection with the impalad raise DisconnectedError("Error communicating with impalad: %s" % e) except TApplicationException, t: raise RPCError("Application Exception : %s" % (t)) return None, RpcStatus.ERROR def get_column_names(service, last_query_handle): rpc_result = __do_rpc( lambda: service.get_results_metadata(last_query_handle)) metadata, _ = rpc_result if not metadata is None: return [fs.name for fs in metadata.schema.fieldSchemas] def get_results_metadata(service, last_query_handle): rpc_result = __do_rpc( lambda: service.get_results_metadata(last_query_handle)) metadata, _ = rpc_result