def main(argv): p = argparse.ArgumentParser() add_common_args(p) # Since THeaderTransport acts as framed transport when detected frame, we # cannot use --transport=framed as it would result in 2 layered frames. p.add_argument('--override-transport') p.add_argument('--override-protocol') args = p.parse_args() assert args.protocol == 'header' assert args.transport == 'buffered' assert not args.ssl sock = TSocket(args.host, args.port, socket_family=socket.AF_INET) if not args.override_transport or args.override_transport == 'buffered': trans = TBufferedTransport(sock) elif args.override_transport == 'framed': print('TFRAMED') trans = TFramedTransport(sock) else: raise ValueError('invalid transport') trans.open() if not args.override_protocol or args.override_protocol == 'binary': proto = TBinaryProtocol(trans) elif args.override_protocol == 'compact': proto = TCompactProtocol(trans) else: raise ValueError('invalid transport') test_void(proto) test_void(proto) trans.close()
def __init__(self, host=None, port=10000, authMechanism=None, user=None, password=None, database=None, cursorclass=Cursor): authMechanisms = set(['NOSASL', 'PLAIN', 'KERBEROS', 'LDAP']) if authMechanism not in authMechanisms or authMechanism == 'KERBEROS': raise NotImplementedError( 'authMechanism is either not supported or not implemented') #Must set a password for thrift, even if it doesn't need one #Open issue with python-sasl if authMechanism == 'PLAIN' and (password is None or len(password) == 0): password = '******' socket = TSocket(host, port) self.cursorclass = cursorclass if authMechanism == 'NOSASL': transport = TBufferedTransport(socket) else: saslc = sasl.Client() saslc.setAttr("username", user) saslc.setAttr("password", password) saslc.init() transport = TSaslClientTransport(saslc, "PLAIN", socket) self.client = TCLIService.Client(TBinaryProtocol(transport)) transport.open() res = self.client.OpenSession(TOpenSessionReq()) self.session = res.sessionHandle if database is not None: with self.cursor() as cur: query = "USE {0}".format(database) cur.execute(query)
def main(args): if(len(args) < 4): print "%s tablename column pattern output[option]"%(args[0]) sys.exit(1) tablename=args[1] column = args[2] pattern = args[3] outputfile = "" if(len(args)>4): outputfile=args[4] getConfiguration('host.properties') transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) # tablename = "%s_%s_master_%s"%(orgId,subOrgId,orgId) rowlist = columnGrep(tablename,column,pattern) print len(rowlist) printStdout(rowlist,outputfile)
def __init__(self, host=None, port=10000, authMechanism=None, user=None, password=None, database=None, cursorclass = Cursor): authMechanisms = set(['NOSASL', 'PLAIN', 'KERBEROS', 'LDAP']) if authMechanism not in authMechanisms or authMechanism == 'KERBEROS': raise NotImplementedError('authMechanism is either not supported or not implemented') #Must set a password for thrift, even if it doesn't need one #Open issue with python-sasl if authMechanism == 'PLAIN' and (password is None or len(password) == 0): password = '******' socket = TSocket(host, port) self.cursorclass = cursorclass if authMechanism == 'NOSASL': transport = TBufferedTransport(socket) else: saslc = sasl.Client() saslc.setAttr("username", user) saslc.setAttr("password", password) saslc.init() transport = TSaslClientTransport(saslc, "PLAIN", socket) self.client = TCLIService.Client(TBinaryProtocol(transport)) transport.open() res = self.client.OpenSession(TOpenSessionReq()) self.session = res.sessionHandle if database is not None: with self.cursor() as cur: query = "USE {0}".format(database) cur.execute(query)
class HS2TestSuite(ImpalaTestSuite): def setup(self): host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = TCLIService.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() @staticmethod def check_response( response, expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix=None): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\ and expected_error_prefix is not None: assert response.status.errorMessage.startswith( expected_error_prefix) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS def fetch(self, handle, orientation, size, expected_num_rows=None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to size, or equal to the given expected_num_rows (it one was given).""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size if expected_num_rows is not None: num_rows = expected_num_rows assert len(fetch_results_resp.results.rows) == num_rows return fetch_results_resp def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp
class HbaseClient: def __init__(self, host, port): self.transport = TBufferedTransport(TSocket(host, port)) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.client = Hbase.Client(self.protocol) self.scan = TScan() def createTable(self, table, contents): return self.client.createTable(table, [contents]) def mutateRow(self, table, row, mutations, st): return self.client.mutateRow(table, row, mutations, st) def getTable(self): return self.client.getTableNames() def scannerGetList(self, tableName, num): id = self.client.scannerOpenWithScan(tableName, self.scan, None) return self.client.scannerGetList(id, num) def scannerGet(self, tableName): id = self.client.scannerOpenWithScan(tableName, self.scan, None) return self.client.scannerGet(id) def close(self): self.transport.close()
def main(argv): p = argparse.ArgumentParser() add_common_args(p) # Since THeaderTransport acts as framed transport when detected frame, we # cannot use --transport=framed as it would result in 2 layered frames. p.add_argument('--override-transport') p.add_argument('--override-protocol') args = p.parse_args() assert args.protocol == 'header' assert args.transport == 'buffered' assert not args.ssl sock = TSocket(args.host, args.port, socket_family=socket.AF_INET) if not args.override_transport or args.override_transport == 'buffered': trans = TBufferedTransport(sock) elif args.override_transport == 'framed': print('TFRAMED') trans = TFramedTransport(sock) else: raise ValueError('invalid transport') trans.open() if not args.override_protocol or args.override_protocol == 'binary': proto = TBinaryProtocol(trans) elif args.override_protocol == 'compact': proto = TCompactProtocol(trans) else: raise ValueError('invalid transport') test_void(proto) test_void(proto) trans.close()
def __init__(self, host=None, port=10000, authMechanism=None, user=None, password=None, database=None, configuration=None, timeout=None): authMechanisms = set(['NOSASL', 'PLAIN', 'KERBEROS', 'LDAP']) if authMechanism not in authMechanisms: raise NotImplementedError('authMechanism is either not supported or not implemented') #Must set a password for thrift, even if it doesn't need one #Open issue with python-sasl if authMechanism == 'PLAIN' and (password is None or len(password) == 0): password = '******' socket = TSocket(host, port) socket.setTimeout(timeout) if authMechanism == 'NOSASL': transport = TBufferedTransport(socket) else: sasl_mech = 'PLAIN' saslc = sasl.Client() saslc.setAttr("username", user) saslc.setAttr("password", password) if authMechanism == 'KERBEROS': krb_host,krb_service = self._get_krb_settings(host, configuration) sasl_mech = 'GSSAPI' saslc.setAttr("host", krb_host) saslc.setAttr("service", krb_service) saslc.init() transport = TSaslClientTransport(saslc, sasl_mech, socket) self.client = TCLIService.Client(TBinaryProtocol(transport)) transport.open() res = self.client.OpenSession(TOpenSessionReq(username=user, password=password, configuration=configuration)) self.session = res.sessionHandle if database is not None: with self.cursor() as cur: query = "USE {0}".format(database) cur.execute(query)
def __init__(self, host, port): transport = TBufferedTransport(TSocket(host, port)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) self.client = HBaseThrift.Client(protocol) self.client
def __init__( self, host, port ): transport = TBufferedTransport(TSocket(host, port)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) self.client = HBaseThrift.Client(protocol) self.client
def __init__(self, host=None, port=10000, authMechanism=None, user=None, password=None, database=None, configuration=None): super(Connection, self).__init__(authMechanism) #Must set a password for thrift, even if it doesn't need one #Open issue with python-sasl password = self._check_password(authMechanism, password) socket = TSocket(host, port) if authMechanism == 'NOSASL': transport = TBufferedTransport(socket) else: saslc, sasl_mech = self._get_sasl_client(host, authMechanism, user, password, configuration) transport = TSaslClientTransport(saslc, sasl_mech, socket) self.client = TCLIService.Client(TBinaryProtocol(transport)) transport.open() res = self.client.OpenSession( TOpenSessionReq(configuration=configuration)) self.session = res.sessionHandle if database is not None: with self.cursor() as cur: query = "USE {0}".format(database) cur.execute(query)
def main(args): # getColumnInfo(table_name) if(len(args)<2): print "TableScan.py tableName No[10]" sys.exit(1) table_name=args[1] NO=10; if(len(args)<3): NO=10; else: NO=int(args[2]); getConfiguration('host.properties') transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) ret=getRowsLimit(table_name,NO) printRowsResult(ret)
def _connect(self): if hasattr(self.context.config, 'HBASE_STORAGE_SERVER_HOSTS'): host = self.context.config.HBASE_STORAGE_SERVER_HOSTS[ (self.context.server.port + self.hbase_server_offset) % len(self.context.config.HBASE_STORAGE_SERVER_HOSTS)] else: host = self.context.config.HBASE_STORAGE_SERVER_HOST transport = TBufferedTransport( TSocket(host=host, port=self.context.config.HBASE_STORAGE_SERVER_PORT)) socket = TSocket(host=host, port=self.context.config.HBASE_STORAGE_SERVER_PORT) # Timeout is sum of HTTP timeouts, plus a bit. try: timeout = 5 socket.setTimeout(timeout * 1000) except: pass try: transport = TBufferedTransport(socket) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) self.storage = Hbase.Client(protocol) logger.info("Connected to HBase server " + host + ":" + str(self.context.config.HBASE_STORAGE_SERVER_PORT)) except: logger.error("Error connecting to HBase server " + host + ":" + str(self.context.config.HBASE_STORAGE_SERVER_PORT)) self.hbase_server_offset = self.hbase_server_offset + 1
def main(args): if (len(args) < 2): print "%s <verified file> -all" % (args[0]) sys.exit(1) filename = args[1] opt_all = True if len(args) > 2 and args[2] == "-all" else False filenamearray = filename.split("_") orgId = filenamearray[0] subOrgId = filenamearray[1] getConfiguration('host.properties') transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) tablename = "%s_%s_master_%s" % (orgId, subOrgId, orgId) for line in open(filename, "r"): input = line.strip() row = client.getRow(tablename, input) print input printRow(row) print "" if (not opt_all): break transport.close()
def _open_hs2_connection(): """Opens a HS2 connection, returning the socket and the thrift client.""" host, port = IMPALAD_HS2_HOST_PORT.split(":") socket = TSocket(host, port) transport = TBufferedTransport(socket) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) hs2_client = ImpalaHiveServer2Service.Client(protocol) return socket, hs2_client
def connect(server='localhost', port=9090, timeout=None): socket = TSocket(server, int(port)) if timeout is not None: socket.setTimeout(timeout) transport = TBufferedTransport(socket) transport.open() protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport) client = Hbase.Client(protocol) return client
def __init__(self,context): self.context=context self.table = self.context.config.HBASE_STORAGE_TABLE self.data_fam = self.context.config.HBASE_STORAGE_FAMILY transport = TBufferedTransport(TSocket(host=self.context.config.HBASE_STORAGE_SERVER_HOST, port=self.context.config.HBASE_STORAGE_SERVER_PORT)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) self.storage = Hbase.Client(protocol)
def create_hs2_client(self): """Creates a new HS2 client connection to the impalad""" host, port = (self.hostname, self.hs2_port) socket = TSocket(host, port) transport = TBufferedTransport(socket) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) hs2_client = TCLIService.Client(protocol) return hs2_client
class HS2TestSuite(ImpalaTestSuite): def setup(self): host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = TCLIService.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() @staticmethod def check_response(response, expected_status_code = TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix = None): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\ and expected_error_prefix is not None: assert response.status.errorMessage.startswith(expected_error_prefix) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS def fetch(self, handle, orientation, size, expected_num_rows = None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to size, or equal to the given expected_num_rows (it one was given).""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size if expected_num_rows is not None: num_rows = expected_num_rows assert len(fetch_results_resp.results.rows) == num_rows return fetch_results_resp def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp
def connect(server='localhost', port=9090, timeout=None): socket = TSocket(server, int(port)) if timeout is not None: socket.setTimeout(timeout) transport = TBufferedTransport(socket) transport.open() protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport) client = Hbase.Client(protocol) return client
def getMasterTables(hbaseHost): transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol) for table in client.getTableNames(): if 'master' in table: print table transport.close()
def main(args): getConfiguration('host.properties') transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) getTableNames()
def getMasterTables(hbaseHost): transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol) for table in client.getTableNames(): if 'master' in table: print table transport.close()
def main(args): getConfiguration('host.properties') transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) getTableNames()
def __init__(self, unix_socket=None, host=None, port=10000, authMechanism=None, user=None, password=None, database=None, configuration=None, timeout=None): authMechanisms = set(['NOSASL', 'PLAIN', 'KERBEROS', 'LDAP']) if authMechanism not in authMechanisms: raise NotImplementedError( 'authMechanism is either not supported or not implemented') #Must set a password for thrift, even if it doesn't need one #Open issue with python-sasl if authMechanism == 'PLAIN' and (password is None or len(password) == 0): password = '******' if unix_socket is not None: socket = TSocket(unix_socket=unix_socket) else: socket = TSocket(host, port) socket.setTimeout(timeout) if authMechanism == 'NOSASL': transport = TBufferedTransport(socket) else: sasl_mech = 'PLAIN' saslc = sasl.Client() saslc.setAttr("username", user) saslc.setAttr("password", password) if authMechanism == 'KERBEROS': krb_host, krb_service = self._get_krb_settings( host, configuration) sasl_mech = 'GSSAPI' saslc.setAttr("host", krb_host) saslc.setAttr("service", krb_service) saslc.init() transport = TSaslClientTransport(saslc, sasl_mech, socket) self.client = TCLIService.Client(TBinaryProtocol(transport)) transport.open() res = self.client.OpenSession( TOpenSessionReq(username=user, password=password, configuration=configuration)) self.session = res.sessionHandle if database is not None: with self.cursor() as cur: query = "USE {0}".format(database) cur.execute(query)
def hs2_port_is_open(self): """Test if the HS2 port is open. Does not need to authenticate.""" # Impyla will try to authenticate as part of connecting, so preserve previous logic # that uses the HS2 thrift code directly. try: socket = TSocket(self.hostname, self.hs2_port) transport = TBufferedTransport(socket) transport.open() transport.close() return True except Exception, e: LOG.info(e) return False
def hs2_port_is_open(self): """Test if the HS2 port is open. Does not need to authenticate.""" # Impyla will try to authenticate as part of connecting, so preserve previous logic # that uses the HS2 thrift code directly. try: socket = TSocket(self.hostname, self.hs2_port) transport = TBufferedTransport(socket) transport.open() transport.close() return True except Exception, e: LOG.info(e) return False
def start(): logger = logging.getLogger('ted') logger.addHandler(logging.StreamHandler()) transport = TBufferedTransport(TSocket('localhost', 9030)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = ted.TedService.Client(protocol) shell = code.InteractiveConsole(globals()) shell.interact("Run client.<command> where command is a Ted command (eg: getWatching())\nSee dir(Iface) for commands")
class ThriftClient(object): def __init__(self, port=6458, host="127.0.0.1"): self.host = host self.port = port self.client = None self.transport = None def connect(self): self.transport = TSocket(self.host, self.port) self.transport = TBufferedTransport(self.transport) protocol = TBinaryProtocol(self.transport) self.client = Client(protocol) self.transport.open() def execute(self, name, *args, **kwargs): if self.client is None: self.connect() result = getattr(self.client, name)(*args, **kwargs) return result def create(self, key, second, minute = -1, hour = -1, day = -1, month = -1, week = -1, action="shell", params={}): return self.execute("create", key, second, minute, hour, day, month, week, action, params) def create_timeout(self, key, second, minute = -1, hour = -1, day = -1, month = -1, week = -1, count=1, action="shell", params={}): return self.execute("createTimeout", key, second, minute, hour, day, month, week, count, action, params) def remove(self, key): return self.execute("remove", key) def get(self, key): return self.execute("get", key) def get_current(self): return self.execute("getCurrent") def get_time(self, timestamp): return self.execute("getTime", timestamp) def get_keys(self, prefix=''): return self.execute("getKeys", prefix) def info(self): return self.execute("info") def __del__(self): if self.client: self.transport.close() self.transport = None self.client = None
class ThriftClient(object): def __init__(self, port=6458, host="127.0.0.1"): self.host = host self.port = port self.client = None self.transport = None def connect(self): self.transport = TSocket(self.host, self.port) self.transport = TBufferedTransport(self.transport) protocol = TBinaryProtocol(self.transport) self.client = Client(protocol) self.transport.open() def execute(self, name, *args, **kwargs): if self.client is None: self.connect() result = getattr(self.client, name)(*args, **kwargs) return result def create(self, key, second, minute = -1, hour = -1, day = -1, month = -1, week = -1, action="shell", params={}): return self.execute("create", key, second, minute, hour, day, month, week, action, params) def create_timeout(self, key, second, minute = -1, hour = -1, day = -1, month = -1, week = -1, count=1, action="shell", params={}): return self.execute("createTimeout", key, second, minute, hour, day, month, week, count, action, params) def remove(self, key): return self.execute("remove", key) def get(self, key): return self.execute("get", key) def get_current(self): return self.execute("getCurrent") def get_time(self, timestamp): return self.execute("getTime", timestamp) def get_keys(self, prefix=''): return self.execute("getKeys", prefix) def __del__(self): if self.client: self.transport.close() self.transport = None self.client = None
def __init__(self, host=None, port=10000, authMechanism=None, user=None, password=None, database=None): authMechanisms = {'NOSASL', 'PLAIN', 'KERBEROS', 'LDAP'} if authMechanism not in authMechanisms or authMechanism == 'KERBEROS': raise NotImplementedError('authMechanism is either not supported or not implemented') socket = TSocket(host, port) if authMechanism == 'NOSASL': transport = TBufferedTransport(socket) else: saslc = sasl.Client() saslc.setAttr("username", user) saslc.setAttr("password", password) saslc.init() transport = TSaslClientTransport(saslc, "PLAIN", socket) self.client = TCLIService.Client(TBinaryProtocol(transport)) transport.open() res = self.client.OpenSession(TOpenSessionReq()) self.session = res.sessionHandle
def main(args): getConfiguration('host.properties') transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) tableList = getMasterTables() # For test #table = 'lyris_uptiilt6_master_lyris' #columnProcess(table) for table in tableList: columnProcess(table)
class writeThread(threading.Thread): def __init__(self, threadname, RecordsThreadwillwrite): threading.Thread.__init__(self, name = threadname) bytesPerColumn = int(bytesPerRecord/columns) - 11 #suppose 3 columns self.columnvalue = "value_" + "x"*bytesPerColumn + "_endv" self.tbwBatch = int (RecordsThreadwillwrite / recordsPerBatch) self.transport = TBufferedTransport(TSocket('10.1.2.230', 9090), 40960) self.transport.open() protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.client = Hbase.Client(protocol) def run(self): print "+%s start" % (self.getName()) global gEndT global gWritenItems threadWritenItem = 0 for loopidx in xrange(0, self.tbwBatch): self.write_hbase() #write threadWritenItem += recordsPerBatch mylock.acquire() gEndT = time.time() gWritenItems += threadWritenItem print "%s done, %s seconds past, %d reocrds saved" % (self.getName(), gEndT-gStartT, gWritenItems) mylock.release() self.transport.close() def write_hbase(self): #write 50 rowkyes, and 3 column families in each rowkey print self.getName(), "Start write" batchmutations = [] for i in xrange(0, recordsPerBatch): # write to db, 300 items together mutations = [] rowkey = "RK_%s_%s" % (random.random(), time.time()) for ii in xrange(0, columns): mutations.append(Hbase.Mutation(column="f1:%s"%ii, value=self.columnvalue)) batchmutations.append(Hbase.BatchMutation(rowkey, mutations)) self.client.mutateRows("testdb1", batchmutations)
def setup(self): transport = TBufferedTransport(TSocket(host='localhost', port=9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) self.connection = Hbase.Client(protocol) self.table = 'thumbor-test' self.family = 'images:' columns = [] col = ttypes.ColumnDescriptor() col.name = self.family col.maxVersions = 1 columns.append(col) try: self.connection.disableTable(self.table) self.connection.deleteTable(self.table) except ttypes.IOError: pass self.connection.createTable(self.table, columns)
def main(args): getConfiguration('host.properties') transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) tableList=getMasterTables() # For test #table = 'lyris_uptiilt6_master_lyris' #columnProcess(table) for table in tableList: columnProcess(table)
def main(args): if(len(args) < 2): print "%s tablename" %(args[0]) sys.exit(1) tablename=args[1] getConfiguration('host.properties') transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) # tablename = "%s_%s_master_%s"%(orgId,subOrgId,orgId) rowPrint(tablename)
def setup(self): transport = TBufferedTransport(TSocket(host='localhost', port=9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) self.connection = Hbase.Client(protocol) self.table='thumbor-test' self.family='images:' columns = [] col = ttypes.ColumnDescriptor() col.name = self.family col.maxVersions = 1 columns.append(col) try: self.connection.disableTable(self.table) self.connection.deleteTable(self.table) except ttypes.IOError: pass self.connection.createTable(self.table, columns)
class MyHbase: """ the main class,use to connect to hbase,create ,select table etc. """ def __init__(self, netloc, port, table="diracAccounting"): self.tableName = table self.transport = TBufferedTransport(TSocket(netloc, port)) self.protocol = TBinaryProtocol(self.transport) self.client = Hbase.Client(self.protocol) self.transport.open() tables = self.client.getTableNames() # check if has table 'diracAccounting',if has then delete it and then recreate it for table in tables: if table == "diracAccounting": if self.client.isTableEnabled(table): print "disabling table:%s" % (table) self.client.disableTable(table) print "deleting table:%s" % (table) self.client.deleteTable(table) # if self.tableName not in tables: self.__createTable(["groupby", "generate"]) def __del__(self): self.transport.close() def __createTable(self, columnfamilyList): """argument: columnfamilyList is a list the columnfanilyList=['gruopby','generate'] """ columns = [] for name in columnfamilyList: col = ColumnDescriptor(name) columns.append(col) print "creating tables:%s" % (self.tableName) try: self.client.createTable(self.tableName, columns) except AlreadyExists, ae: print "WARN: " + ae.message
def __init__(self, host=None, port=10000, authMechanism=None, user=None, password=None, database=None, configuration=None): super(Connection, self).__init__(authMechanism) #Must set a password for thrift, even if it doesn't need one #Open issue with python-sasl password = self._check_password(authMechanism, password) socket = TSocket(host, port) if authMechanism == 'NOSASL': transport = TBufferedTransport(socket) else: saslc, sasl_mech = self._get_sasl_client(host, authMechanism, user, password, configuration) transport = TSaslClientTransport(saslc, sasl_mech, socket) self.client = TCLIService.Client(TBinaryProtocol(transport)) transport.open() res = self.client.OpenSession(TOpenSessionReq(configuration=configuration)) self.session = res.sessionHandle if database is not None: with self.cursor() as cur: query = "USE {0}".format(database) cur.execute(query)
def main(args): if (len(args) < 3): print "%s hbasehost tablename " % args[0] sys.exit(1) hbaseHost = args[1] table_name = args[2] transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) updateColumn(table_name) transport.close()
def main(args): if (len(args)<3): print "%s hbasehost tablename " % args[0] sys.exit(1) hbaseHost = args[1] table_name= args[2] transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) updateColumn(table_name) transport.close()
class ServidorArchivos: host = "localhost" puerto = 42778 def __init__(self): socket = TSocket(self.host, self.puerto) self.transport = TBufferedTransport(socket) protocolo = TBinaryProtocol(self.transport) self.conexion = Client(protocolo) def guardar_archivo(self, archivo: FileStorage, ruta: str): imagen = Imagen(archivo.read(), ruta) self.transport.open() resultado = self.conexion.guardarArchivo(imagen) self.transport.close() return resultado def eliminar_archivo(self, ruta: str): self.transport.open() resultado = self.conexion.eliminarArchivo(ruta) self.transport.close() return resultado def obtener_archivos(self, ruta: str) -> list: print(ruta) self.transport.open() resultado = self.conexion.obtenerArchivos(ruta) self.transport.close() return resultado
def do_import(n, filename): try: transport = TBufferedTransport(TSocket(THRIFT_SERVER, THRIFT_PORT)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol) fp = open(filename) tweets = json.load(fp) fp.close() for t in tweets: try: mutations = create_mutations(t) client.mutateRow('tweets', t['idstr'], mutations, None) except Exception, e: with print_lock: print('[WARNING]Tweet id:%s (file: %s) caused an exception: %s' % (t['idstr'], filename, e)) try: open(filename + '.done', 'w').close() except: # ignored pass
def __init__(self, host=None, port=10000, authMechanism="PLAIN", user=None, password=None, database=None, configuration=None, timeout=None): authMechanisms = {"PLAIN", "NOSASL"} if authMechanism not in authMechanisms: raise NotImplementedError("authMechanism '{}' is either not supported or not implemented".format(authMechanism)) socket = TSocket.TSocket(host, port) socket.setTimeout(timeout) if authMechanism == "NOSASL": transport = TBufferedTransport(socket) else: # authMechanism == "PLAIN": password = "******" if (password is None or len(password) == 0) else password transport = TSaslClientTransport(socket, host=host, service=None, mechanism=authMechanism, username=user, password=password) self.client = TCLIService.Client(TBinaryProtocol(transport)) transport.open() res = self.client.OpenSession(TOpenSessionReq(username=user, password=password, configuration=configuration)) self.session = res.sessionHandle if database is not None: with self.cursor() as cur: query = "USE {0}".format(database) cur.execute(query)
def main(argv): p = argparse.ArgumentParser() add_common_args(p) # Since THeaderTransport acts as framed transport when detected frame, we # cannot use --transport=framed as it would result in 2 layered frames. p.add_argument('--override-transport') args = p.parse_args() assert args.protocol == 'header' assert args.transport == 'buffered' assert not args.ssl sock = TSocket(args.host, args.port, socket_family=socket.AF_INET) if not args.override_transport or args.override_transport == 'buffered': trans = TBufferedTransport(sock) elif args.override_transport == 'framed': trans = TFramedTransport(sock) else: raise ValueError('invalid transport') trans.open() proto = TBinaryProtocol(trans) proto.writeMessageBegin('testVoid', TMessageType.CALL, 3) proto.writeStructBegin('testVoid_args') proto.writeFieldStop() proto.writeStructEnd() proto.writeMessageEnd() trans.flush() _, mtype, _ = proto.readMessageBegin() assert mtype == TMessageType.REPLY proto.readStructBegin() _, ftype, _ = proto.readFieldBegin() assert ftype == TType.STOP proto.readFieldEnd() proto.readStructEnd() proto.readMessageEnd() trans.close()
class MyHbase(): def __init__(self,netloc,port,tablename): self.tablename = tablename self.transport = TBufferedTransport(TSocket(netloc,port)) self.protocol = TBinaryProtocol(self.transport) self.client = Hbase.Client(self.protocol) self.transport.open() if self.tablename in self.client.getTableNames(): if not self.client.isTableEnabled(self.tablename): print 'table off_line, start bring it on_line now' self.client.enableTable(self.tablename) print "successfully connect table: %s"%(self.tablename) print else: print "wrong table name" def __del__(self): self.transport.close() def printRow(self,entry): print "row: " + entry.row + ", cols:", for k in sorted(entry.columns): print k + " => " + entry.columns[k].value print def showTableInfo(self): ''' show some information of the hbase table ''' #print self.client.getColumnDescriptors(self.tablename) temp = self.client.getColumnDescriptors(self.tablename) for (col,desc) in temp.items(): print "column with name:"+col print desc def generatePlot(self,groupbyname,generatename): '''
def main(args): if(len(args) < 2): print "%s <verified file> -all"%(args[0]) sys.exit(1) filename = args[1] opt_all = True if len(args)>2 and args[2] == "-all" else False filenamearray = filename.split("_") orgId = filenamearray[0] subOrgId = filenamearray[1] getConfiguration('host.properties') transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) tablename = "%s_%s_master_%s"%(orgId,subOrgId,orgId); for line in open(filename,"r"): input=line.strip() row=client.getRow(tablename,input) print input printRow(row) print "" if (not opt_all): break transport.close()
def main(args): if(len(args)!=2): print "%s tableName"%(args[0]) sys.exit(1) tablename = args[1] getConfiguration('host.properties') transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) # tablename = "%s_%s_master_%s"%(orgId,subOrgId,"lyris"); client.disableTable(tablename) client.deleteTable(tablename) transport.close()
class HBaseOperator(): def __init__(self): self.host = "193.169.100.33" self.port = 2181 self.transport = TBufferedTransport(TSocket(self.host, self.port)) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.client = Hbase.Client(self.protocol) def __del__(self): self.transport.close() def getAllTablesInfo(self): #get table info listTables = self.client.getTableNames() print("=" * 40) print("Show all tables information....") for tableName in listTables: print("TableName:" + tableName) listColumns = self.client.getColumnDescriptors(tableName) print(listColumns) listTableRegions = self.client.getTableRegions(tableName) print(listTableRegions) print("+" * 40)
def main(args): if len(args) != 3: print "%s orgId subOrgId" % (args[0]) sys.exit(1) orgId = args[1] subOrgId = args[2] getConfiguration("host.properties") transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) tablename = "%s_%s_master_%s" % (orgId, subOrgId, orgId) connectSolrServer(orgId, subOrgId) verifyData(tablename) transport.close()
def main(args): if (len(args) != 3): print "%s orgId subOrgId" % (args[0]) sys.exit(1) orgId = args[1] subOrgId = args[2] getConfiguration('host.properties') transport = TBufferedTransport(TSocket(hbaseHost, 9090)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) global client client = Hbase.Client(protocol) tablename = "%s_%s_master_%s" % (orgId, subOrgId, orgId) connectSolrServer(orgId, subOrgId) verifyData(tablename) transport.close()
class TestAuthorization(CustomClusterTestSuite): AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR')) def setup(self): host, port = (self.cluster.impalads[0].service.hostname, self.cluster.impalads[0].service.hs2_port) self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorization_policy_provider_class=%s" %\ (AUTH_POLICY_FILE, "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider")) def test_custom_authorization_provider(self): from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() # User is 'test_user' (defined in the authorization policy file) open_session_req.username = '******' open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to access' % 'test_user' in\ str(execute_statement_resp) # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorized_proxy_user_config=hue=%s" % (AUTH_POLICY_FILE, getuser())) def test_access_runtime_profile(self): from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = getuser() open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Current user can't access view's underlying tables self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "explain select * from functional.complex_view" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to EXPLAIN' % getuser() in\ str(execute_statement_resp) # User should not have access to the runtime profile self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, False) self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, True) # Repeat as a delegated user open_session_req.username = '******' open_session_req.configuration = dict() # Delegated user is the current user open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) self.session_handle = resp.sessionHandle # User should not have access to the runtime profile self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, False) self.__run_stmt_and_verify_profile_access( "select * from functional.complex_view", False, True) # Create a view for which the user has access to the underlying tables. open_session_req.username = getuser() open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = """create view if not exists tpch.customer_view as select * from tpch.customer limit 1""" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # User should be able to run EXPLAIN execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = """explain select * from tpch.customer_view""" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # User should have access to the runtime profile and exec summary self.__run_stmt_and_verify_profile_access( "select * from tpch.customer_view", True, False) self.__run_stmt_and_verify_profile_access( "select * from tpch.customer_view", True, True) # Repeat as a delegated user open_session_req.username = '******' open_session_req.configuration = dict() # Delegated user is the current user open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) self.session_handle = resp.sessionHandle # User should have access to the runtime profile and exec summary self.__run_stmt_and_verify_profile_access( "select * from tpch.customer_view", True, False) self.__run_stmt_and_verify_profile_access( "select * from tpch.customer_view", True, True) # Clean up execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "drop view if exists tpch.customer_view" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorized_proxy_user_config=foo=bar;hue=%s\ --abort_on_failed_audit_event=false\ --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, getuser(), AUDIT_LOG_DIR)) def test_user_impersonation(self): """End-to-end user impersonation + authorization test""" self.__test_impersonation() @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( "--server_name=server1\ --authorization_policy_file=%s\ --authorized_proxy_user_config=hue=bar\ --authorized_proxy_group_config=foo=bar;hue=%s\ --abort_on_failed_audit_event=false\ --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, grp.getgrgid(os.getgid()).gr_name, AUDIT_LOG_DIR)) def test_group_impersonation(self): """End-to-end group impersonation + authorization test""" self.__test_impersonation() @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorized_proxy_user_config=foo=bar\ --authorized_proxy_group_config=foo=bar\ --abort_on_failed_audit_event=false\ --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, AUDIT_LOG_DIR)) def test_no_matching_user_and_group_impersonation(self): open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = '******' open_session_req.configuration = dict() open_session_req.configuration['impala.doas.user'] = '******' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to delegate to \'abc\'' in str( resp) def __test_impersonation(self): """End-to-end impersonation + authorization test. Expects authorization to be configured before running this test""" # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import # the module within this test function, rather than as a top-level import. This way # the tests in that module will not get pulled when executing this test suite. The fix # is to split the utility code out of the TestHS2 class and support HS2 as a first # class citizen in our test framework. from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() # Connected user is 'hue' open_session_req.username = '******' open_session_req.configuration = dict() # Delegated user is the current user open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Try to query a table we are not authorized to access. self.session_handle = resp.sessionHandle execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch_seq.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) assert 'User \'%s\' does not have privileges to access' % getuser() in\ str(execute_statement_resp) assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\ 'No matching audit event recorded in time window' # Now try the same operation on a table we are authorized to access. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = "describe tpch.lineitem" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # Verify the correct user information is in the runtime profile query_id = operation_id_to_query_id( execute_statement_resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user=getuser(), delegated_user=getuser(), connected_user='******') # Try to user we are not authorized to delegate to. open_session_req.configuration['impala.doas.user'] = '******' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str( resp) # Create a new session which does not have a do_as_user. open_session_req.username = '******' open_session_req.configuration = dict() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) # Run a simple query, which should succeed. execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = resp.sessionHandle execute_statement_req.statement = "select 1" execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) # Verify the correct user information is in the runtime profile. Since there is # no do_as_user the Delegated User field should be empty. query_id = operation_id_to_query_id( execute_statement_resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self.__verify_profile_user_fields(profile_page, effective_user='******', delegated_user='', connected_user='******') self.socket.close() self.socket = None def __verify_profile_user_fields(self, profile_str, effective_user, connected_user, delegated_user): """Verifies the given runtime profile string contains the specified values for User, Connected User, and Delegated User""" assert '\n User: %s\n' % effective_user in profile_str assert '\n Connected User: %s\n' % connected_user in profile_str assert '\n Delegated User: %s\n' % delegated_user in profile_str def __wait_for_audit_record(self, user, impersonator, timeout_secs=30): """Waits until an audit log record is found that contains the given user and impersonator, or until the timeout is reached. """ # The audit event might not show up immediately (the audit logs are flushed to disk # on regular intervals), so poll the audit event logs until a matching record is # found. start_time = time() while time() - start_time < timeout_secs: for audit_file_name in os.listdir(self.AUDIT_LOG_DIR): if self.__find_matching_audit_record(audit_file_name, user, impersonator): return True sleep(1) return False def __find_matching_audit_record(self, audit_file_name, user, impersonator): with open(os.path.join(self.AUDIT_LOG_DIR, audit_file_name)) as audit_log_file: for line in audit_log_file.readlines(): json_dict = json.loads(line) if len(json_dict) == 0: continue if json_dict[min(json_dict)]['user'] == user and\ json_dict[min(json_dict)]['impersonator'] == impersonator: return True return False def __run_stmt_and_verify_profile_access(self, stmt, has_access, close_operation): """Runs 'stmt' and retrieves the runtime profile and exec summary. If 'has_access' is true, it verifies that no runtime profile or exec summary are returned. If 'close_operation' is true, make sure the operation is closed before retrieving the profile and exec summary.""" from tests.hs2.test_hs2 import TestHS2 execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = stmt execute_statement_resp = self.hs2_client.ExecuteStatement( execute_statement_req) TestHS2.check_response(execute_statement_resp) if close_operation: close_operation_req = TCLIService.TCloseOperationReq() close_operation_req.operationHandle = execute_statement_resp.operationHandle TestHS2.check_response( self.hs2_client.CloseOperation(close_operation_req)) get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq() get_profile_req.operationHandle = execute_statement_resp.operationHandle get_profile_req.sessionHandle = self.session_handle get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req) if has_access: TestHS2.check_response(get_profile_resp) assert "Plan: " in get_profile_resp.profile else: assert "User %s is not authorized to access the runtime profile or "\ "execution summary." % (getuser()) in str(get_profile_resp) exec_summary_req = ImpalaHiveServer2Service.TGetExecSummaryReq() exec_summary_req.operationHandle = execute_statement_resp.operationHandle exec_summary_req.sessionHandle = self.session_handle exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req) if has_access: TestHS2.check_response(exec_summary_resp) else: assert "User %s is not authorized to access the runtime profile or "\ "execution summary." % (getuser()) in str(exec_summary_resp) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=" + SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_deprecated_none_", dir=os.getenv("LOG_DIR"))) def test_deprecated_flag_doesnt_show(self): assert_no_files_in_dir_contain( self.impala_log_dir, "authorization_policy_file " + "flag is deprecated. Object Ownership feature is not supported") @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--server_name=server1\ --authorization_policy_file=%s\ --authorization_policy_provider_class=%s" % ( AUTH_POLICY_FILE, "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider" ), impala_log_dir=tempfile.mkdtemp( prefix="test_deprecated_", dir=os.getenv("LOG_DIR"))) def test_deprecated_flags(self): assert_file_in_dir_contains( self.impala_log_dir, "authorization_policy_file flag" + " is deprecated. Object Ownership feature is not supported") @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_", dir=os.getenv("LOG_DIR"))) def test_catalog_restart(self, unique_role): """IMPALA-7713: Tests that a catalogd restart when authorization is enabled should reset the previous privileges stored in impalad's catalog to avoid stale privilege data in the impalad's catalog.""" def assert_privileges(): result = self.client.execute("show grant role %s_foo" % unique_role) TestAuthorization._check_privileges( result, [["database", "functional", "", "", "", "all", "false"]]) result = self.client.execute("show grant role %s_bar" % unique_role) TestAuthorization._check_privileges( result, [["database", "functional_kudu", "", "", "", "all", "false"]]) result = self.client.execute("show grant role %s_baz" % unique_role) TestAuthorization._check_privileges( result, [["database", "functional_avro", "", "", "", "all", "false"]]) self.role_cleanup(unique_role) try: self.client.execute("create role %s_foo" % unique_role) self.client.execute("create role %s_bar" % unique_role) self.client.execute("create role %s_baz" % unique_role) self.client.execute( "grant all on database functional to role %s_foo" % unique_role) self.client.execute( "grant all on database functional_kudu to role %s_bar" % unique_role) self.client.execute( "grant all on database functional_avro to role %s_baz" % unique_role) assert_privileges() self._start_impala_cluster([ "--catalogd_args=--sentry_config=%s" % SENTRY_CONFIG_FILE, "--restart_catalogd_only" ]) assert_privileges() finally: self.role_cleanup(unique_role) def role_cleanup(self, role_name_match): """Cleans up any roles that match the given role name.""" for role_name in self.client.execute("show roles").data: if role_name_match in role_name: self.client.execute("drop role %s" % role_name) @staticmethod def _check_privileges(result, expected): def columns(row): cols = row.split("\t") return cols[0:len(cols) - 1] assert map(columns, result.data) == expected @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_", dir=os.getenv("LOG_DIR"))) def test_catalog_object(self, unique_role): """IMPALA-7721: Tests /catalog_object web API for principal and privilege""" self.role_cleanup(unique_role) try: self.client.execute("create role %s" % unique_role) self.client.execute( "grant select on database functional to role %s" % unique_role) for service in [ self.cluster.catalogd.service, self.cluster.get_first_impalad().service ]: obj_dump = service.get_catalog_object_dump( "PRINCIPAL", "%s.ROLE" % unique_role) assert "catalog_version" in obj_dump # Get the privilege associated with that principal ID. principal_id = re.search(r"principal_id \(i32\) = (\d+)", obj_dump) assert principal_id is not None obj_dump = service.get_catalog_object_dump( "PRIVILEGE", urllib.quote( "server=server1->db=functional->action=select->grantoption=false.%s.ROLE" % principal_id.group(1))) assert "catalog_version" in obj_dump # Get the principal that does not exist. obj_dump = service.get_catalog_object_dump( "PRINCIPAL", "doesnotexist.ROLE") assert "CatalogException" in obj_dump # Get the privilege that does not exist. obj_dump = service.get_catalog_object_dump( "PRIVILEGE", urllib.quote( "server=server1->db=doesntexist->action=select->grantoption=false.%s.ROLE" % principal_id.group(1))) assert "CatalogException" in obj_dump finally: self.role_cleanup(unique_role) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args= "--sentry_config=%s --sentry_catalog_polling_frequency_s=3600" % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp( prefix="test_invalidate_metadata_sentry_unavailable_", dir=os.getenv("LOG_DIR"))) def test_invalidate_metadata_sentry_unavailable(self, unique_role): """IMPALA-7824: Tests that running INVALIDATE METADATA when Sentry is unavailable should not cause Impala to hang.""" self.role_cleanup(unique_role) try: group_name = grp.getgrnam(getuser()).gr_name self.client.execute("create role %s" % unique_role) self.client.execute("grant all on server to role %s" % unique_role) self.client.execute("grant role %s to group `%s`" % (unique_role, group_name)) self._stop_sentry_service() # Calling INVALIDATE METADATA when Sentry is unavailable should return an error. result = self.execute_query_expect_failure(self.client, "invalidate metadata") result_str = str(result) assert "MESSAGE: CatalogException: Error refreshing authorization policy:" \ in result_str assert "CAUSED BY: ImpalaRuntimeException: Error refreshing authorization policy." \ " Sentry is unavailable. Ensure Sentry is up:" in result_str self._start_sentry_service(SENTRY_CONFIG_FILE) # Calling INVALIDATE METADATA after Sentry is up should not return an error. self.execute_query_expect_success(self.client, "invalidate metadata") finally: self.role_cleanup(unique_role) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--server_name=server1 --sentry_config=%s" % SENTRY_CONFIG_FILE, catalogd_args= "--sentry_config=%s --sentry_catalog_polling_frequency_s=3600 " % SENTRY_CONFIG_FILE, impala_log_dir=tempfile.mkdtemp(prefix="test_refresh_authorization_", dir=os.getenv("LOG_DIR"))) def test_refresh_authorization(self, unique_role): """Tests refresh authorization statement by adding and removing roles and privileges externally. The long Sentry polling is used so that any authorization metadata updated externally does not get polled by Impala in order to test an an explicit call to refresh authorization statement.""" group_name = grp.getgrnam(getuser()).gr_name self.role_cleanup(unique_role) for sync_ddl in [1, 0]: query_options = {'sync_ddl': sync_ddl} clients = [] if sync_ddl: # When sync_ddl is True, we want to ensure the changes are propagated to all # coordinators. for impalad in self.cluster.impalads: clients.append(impalad.service.create_beeswax_client()) else: clients.append(self.client) try: self.client.execute("create role %s" % unique_role) self.client.execute("grant role %s to group `%s`" % (unique_role, group_name)) self.client.execute("grant refresh on server to %s" % unique_role) self.validate_refresh_authorization_roles( unique_role, query_options, clients) self.validate_refresh_authorization_privileges( unique_role, query_options, clients) finally: self.role_cleanup(unique_role) def validate_refresh_authorization_roles(self, unique_role, query_options, clients): """This method tests refresh authorization statement by adding and removing roles externally.""" try: # Create two roles inside Impala. self.client.execute("create role %s_internal1" % unique_role) self.client.execute("create role %s_internal2" % unique_role) # Drop an existing role (_internal1) outside Impala. role = "%s_internal1" % unique_role subprocess.check_call([ "/bin/bash", "-c", "%s/bin/sentryShell --conf %s/sentry-site.xml -dr -r %s" % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role) ], stdout=sys.stdout, stderr=sys.stderr) result = self.execute_query_expect_success(self.client, "show roles") assert any(role in x for x in result.data) self.execute_query_expect_success(self.client, "refresh authorization", query_options=query_options) for client in clients: result = self.execute_query_expect_success( client, "show roles") assert not any(role in x for x in result.data) # Add a new role outside Impala. role = "%s_external" % unique_role subprocess.check_call([ "/bin/bash", "-c", "%s/bin/sentryShell --conf %s/sentry-site.xml -cr -r %s" % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role) ], stdout=sys.stdout, stderr=sys.stderr) result = self.execute_query_expect_success(self.client, "show roles") assert not any(role in x for x in result.data) self.execute_query_expect_success(self.client, "refresh authorization", query_options=query_options) for client in clients: result = self.execute_query_expect_success( client, "show roles") assert any(role in x for x in result.data) finally: for suffix in ["internal1", "internal2", "external"]: self.role_cleanup("%s_%s" % (unique_role, suffix)) def validate_refresh_authorization_privileges(self, unique_role, query_options, clients): """This method tests refresh authorization statement by adding and removing privileges externally.""" # Grant select privilege outside Impala. subprocess.check_call([ "/bin/bash", "-c", "%s/bin/sentryShell --conf %s/sentry-site.xml -gpr -p " "'server=server1->db=functional->table=alltypes->action=select' -r %s" % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), unique_role) ], stdout=sys.stdout, stderr=sys.stderr) # Before refresh authorization, there should only be one refresh privilege. result = self.execute_query_expect_success( self.client, "show grant role %s" % unique_role) assert len(result.data) == 1 assert any("refresh" in x for x in result.data) for client in clients: self.execute_query_expect_failure( client, "select * from functional.alltypes limit 1") self.execute_query_expect_success(self.client, "refresh authorization", query_options=query_options) for client in clients: # Ensure select privilege was granted after refresh authorization. result = self.execute_query_expect_success( client, "show grant role %s" % unique_role) assert len(result.data) == 2 assert any("select" in x for x in result.data) assert any("refresh" in x for x in result.data) self.execute_query_expect_success( client, "select * from functional.alltypes limit 1")
class TestAuthorizedProxy(CustomClusterTestSuite): def setup(self): host, port = (self.cluster.impalads[0].service.hostname, self.cluster.impalads[0].service.hs2_port) self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): if self.socket: self.socket.close() shutil.rmtree(AUDIT_LOG_DIR, ignore_errors=True) def _execute_hs2_stmt(self, statement, verify=True): """ Executes an hs2 statement :param statement: the statement to execute :param verify: If set to true, will thrown an exception on a failed hs2 execution :return: the result of execution """ from tests.hs2.test_hs2 import TestHS2 execute_statement_req = TCLIService.TExecuteStatementReq() execute_statement_req.sessionHandle = self.session_handle execute_statement_req.statement = statement result = self.hs2_client.ExecuteStatement(execute_statement_req) if verify: TestHS2.check_response(result) return result def _open_hs2(self, user, configuration, verify=True): """ Open a session with hs2 :param user: the user to open the session :param configuration: the configuration for the session :param verify: If set to true, will thrown an exception on failed session open :return: the result of opening the session """ from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = user open_session_req.configuration = configuration resp = self.hs2_client.OpenSession(open_session_req) if verify: TestHS2.check_response(resp) return resp @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="{0} --authorized_proxy_user_config=foo=bar;hue={1} ". format(SENTRY_IMPALAD_ARGS, getuser()), catalogd_args=SENTRY_CATALOGD_ARGS) def test_authorized_proxy_user_with_sentry(self, unique_role): """Tests authorized proxy user with Sentry using HS2.""" self._test_authorized_proxy_with_sentry(unique_role, self._test_authorized_proxy) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="{0} --authorized_proxy_user_config=foo=bar;hue={1} ". format(RANGER_IMPALAD_ARGS, getuser()), catalogd_args=RANGER_CATALOGD_ARGS) def test_authorized_proxy_user_with_ranger(self): # This test fails due to bumping up the Ranger to a newer version. # TODO(fangyu.rao): Fix in a follow up commit. pytest.xfail("failed due to bumping up the Ranger to a newer version") """Tests authorized proxy user with Ranger using HS2.""" self._test_authorized_proxy_with_ranger(self._test_authorized_proxy) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="{0} --authorized_proxy_user_config=hue=bar " "--authorized_proxy_group_config=foo=bar;hue={1}".format( SENTRY_IMPALAD_ARGS, grp.getgrgid(os.getgid()).gr_name), catalogd_args=SENTRY_CATALOGD_ARGS) def test_authorized_proxy_group_with_sentry(self, unique_role): """Tests authorized proxy group with Sentry using HS2.""" self._test_authorized_proxy_with_sentry(unique_role, self._test_authorized_proxy) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="{0} --authorized_proxy_user_config=hue=bar " "--authorized_proxy_group_config=foo=bar;hue={1}".format( RANGER_IMPALAD_ARGS, grp.getgrgid(os.getgid()).gr_name), catalogd_args=RANGER_CATALOGD_ARGS) def test_authorized_proxy_group_with_ranger(self): # This test fails due to bumping up the Ranger to a newer version. # TODO(fangyu.rao): Fix in a follow up commit. pytest.xfail("failed due to bumping up the Ranger to a newer version") """Tests authorized proxy group with Ranger using HS2.""" self._test_authorized_proxy_with_ranger(self._test_authorized_proxy) @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="{0} --authorized_proxy_user_config=foo=bar " "--authorized_proxy_group_config=foo=bar".format(SENTRY_IMPALAD_ARGS), catalogd_args=SENTRY_CATALOGD_ARGS) def test_no_matching_user_and_group_authorized_proxy_with_sentry(self): self._test_no_matching_user_and_group_authorized_proxy() @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="{0} --authorized_proxy_user_config=foo=bar " "--authorized_proxy_group_config=foo=bar".format(RANGER_IMPALAD_ARGS), catalogd_args=RANGER_CATALOGD_ARGS) def test_no_matching_user_and_group_authorized_proxy_with_ranger(self): self._test_no_matching_user_and_group_authorized_proxy() def _test_no_matching_user_and_group_authorized_proxy(self): open_session_req = TCLIService.TOpenSessionReq() open_session_req.username = "******" open_session_req.configuration = dict() open_session_req.configuration["impala.doas.user"] = "******" resp = self.hs2_client.OpenSession(open_session_req) assert "User 'hue' is not authorized to delegate to 'abc'" in str(resp) def _test_authorized_proxy_with_sentry(self, role, test_func): try: self.session_handle = self._open_hs2(getuser(), dict()).sessionHandle self._execute_hs2_stmt("create role {0}".format(role)) self._execute_hs2_stmt( "grant all on table tpch.lineitem to role {0}".format(role)) self._execute_hs2_stmt("grant role {0} to group {1}".format( role, grp.getgrnam(getuser()).gr_name)) self._execute_hs2_stmt("grant role {0} to group {1}".format( role, grp.getgrgid(os.getgid()).gr_name)) test_func() finally: self.session_handle = self._open_hs2(getuser(), dict()).sessionHandle self._execute_hs2_stmt( "grant all on server to role {0}".format(role)) self._execute_hs2_stmt("grant role {0} to group {1}".format( role, grp.getgrnam(getuser()).gr_name)) self._execute_hs2_stmt("drop role {0}".format(role)) def _test_authorized_proxy_with_ranger(self, test_func): try: self.session_handle = self._open_hs2(RANGER_ADMIN_USER, dict()).sessionHandle self._execute_hs2_stmt( "grant all on table tpch.lineitem to user {0}".format( getuser())) test_func() finally: self.session_handle = self._open_hs2(RANGER_ADMIN_USER, dict()).sessionHandle self._execute_hs2_stmt( "revoke all on table tpch.lineitem from user {0}".format( getuser())) def _test_authorized_proxy(self): """End-to-end impersonation + authorization test. Expects authorization to be configured before running this test""" # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import # the module within this test function, rather than as a top-level import. This way # the tests in that module will not get pulled when executing this test suite. The fix # is to split the utility code out of the TestHS2 class and support HS2 as a first # class citizen in our test framework. from tests.hs2.test_hs2 import TestHS2 # Try to query a table we are not authorized to access. self.session_handle = self._open_hs2("hue", { "impala.doas.user": getuser() }).sessionHandle bad_resp = self._execute_hs2_stmt("describe tpch_seq.lineitem", False) assert "User '%s' does not have privileges to access" % getuser() in \ str(bad_resp) assert self._wait_for_audit_record(user=getuser(), impersonator="hue"), \ "No matching audit event recorded in time window" # Now try the same operation on a table we are authorized to access. good_resp = self._execute_hs2_stmt("describe tpch.lineitem") TestHS2.check_response(good_resp) # Verify the correct user information is in the runtime profile. query_id = operation_id_to_query_id( good_resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self._verify_profile_user_fields(profile_page, effective_user=getuser(), delegated_user=getuser(), connected_user="******") # Try to delegate a user we are not authorized to delegate to. resp = self._open_hs2("hue", {"impala.doas.user": "******"}, False) assert "User 'hue' is not authorized to delegate to 'some_user'" in str( resp) # Create a new session which does not have a do_as_user and run a simple query. self.session_handle = self._open_hs2("hue", dict()).sessionHandle resp = self._execute_hs2_stmt("select 1") # Verify the correct user information is in the runtime profile. Since there is # no do_as_user the Delegated User field should be empty. query_id = operation_id_to_query_id(resp.operationHandle.operationId) profile_page = self.cluster.impalads[ 0].service.read_query_profile_page(query_id) self._verify_profile_user_fields(profile_page, effective_user="******", delegated_user="", connected_user="******") def _verify_profile_user_fields(self, profile_str, effective_user, connected_user, delegated_user): """Verifies the given runtime profile string contains the specified values for User, Connected User, and Delegated User""" assert "\n User: {0}\n".format(effective_user) in profile_str assert "\n Connected User: {0}\n".format( connected_user) in profile_str assert "\n Delegated User: {0}\n".format( delegated_user) in profile_str def _wait_for_audit_record(self, user, impersonator, timeout_secs=30): """Waits until an audit log record is found that contains the given user and impersonator, or until the timeout is reached. """ # The audit event might not show up immediately (the audit logs are flushed to disk # on regular intervals), so poll the audit event logs until a matching record is # found. start_time = time.time() while time.time() - start_time < timeout_secs: for audit_file_name in os.listdir(AUDIT_LOG_DIR): if self._find_matching_audit_record(audit_file_name, user, impersonator): return True time.sleep(1) return False def _find_matching_audit_record(self, audit_file_name, user, impersonator): with open(os.path.join(AUDIT_LOG_DIR, audit_file_name)) as audit_log_file: for line in audit_log_file.readlines(): json_dict = json.loads(line) if len(json_dict) == 0: continue if json_dict[min(json_dict)]["user"] == user and \ json_dict[min(json_dict)]["impersonator"] == impersonator: return True return False
class HBaseClient(object): def __init__(self, ip, port=9090): """ 建立与thrift server端的连接 """ # server端地址和端口设定 self.__transport = TBufferedTransport(TSocket.TSocket(ip, port)) # 设置传输协议 protocol = TBinaryProtocol.TBinaryProtocol(self.__transport) # 客户端 self.__client = Hbase.Client(protocol) # 打开连接 self.__transport.open() def __del__(self): self.__transport.close() def get_tables(self): """ 获得所有表 :return:表名列表 """ return self.__client.getTableNames() def create_table(self, table, *columns): """ 创建表格 :param table:表名 :param columns:列族名 """ func = lambda col: ColumnDescriptor(col) column_families = map(func, columns) self.__client.createTable(table, column_families) def put(self, table, row, columns): """ 添加记录 :param table:表名 :param row:行键 :param columns:列名 :return: """ func = lambda k, v: Mutation(column=k, value=v) mutations = map(func, columns.items()) self.__client.mutateRow(table, row, mutations) def delete(self, table, row, column): """ 删除记录 :param table:表名 :param row:行键 """ self.__client.deleteAll(table, row, column) def scan(self, table, start_row="", columns=None): """ 获得记录 :param table: 表名 :param start_row: 起始行 :param columns: 列族 :param attributes: """ scanner = self.__client.scannerOpen(table, start_row, columns) func = lambda k, v: (k, v.value) while True: r = self.__client.scannerGet(scanner) if not r: break yield dict(map(func, r[0].columns.items()))
def connect(server='localhost', port=9090): transport = TBufferedTransport(TSocket(server, int(port))) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol) return client
class HS2TestSuite(ImpalaTestSuite): TEST_DB = 'hs2_db' HS2_V6_COLUMN_TYPES = [ 'boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val', 'doubleVal', 'binaryVal' ] def setup(self): self.cleanup_db(self.TEST_DB) host, port = IMPALAD_HS2_HOST_PORT.split(":") self.socket = TSocket(host, port) self.transport = TBufferedTransport(self.socket) self.transport.open() self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol) def teardown(self): self.cleanup_db(self.TEST_DB) if self.socket: self.socket.close() @staticmethod def check_response( response, expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS, expected_error_prefix=None): assert response.status.statusCode == expected_status_code if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\ and expected_error_prefix is not None: assert response.status.errorMessage.startswith( expected_error_prefix) def close(self, op_handle): close_op_req = TCLIService.TCloseOperationReq() close_op_req.operationHandle = op_handle close_op_resp = self.hs2_client.CloseOperation(close_op_req) assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS def get_num_rows(self, result_set): # rows will always be set, so the only way to tell if we should use it is to see if # any columns are set if result_set.columns is None or len(result_set.columns) == 0: return len(result_set.rows) assert result_set.columns is not None for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(result_set.columns[0], col_type) if typed_col != None: return len(typed_col.values) assert False def fetch_at_most(self, handle, orientation, size, expected_num_rows=None): """Fetches at most size number of rows from the query identified by the given operation handle. Uses the given fetch orientation. Asserts that the fetch returns a success status, and that the number of rows returned is equal to given expected_num_rows (if given). It is only safe for expected_num_rows to be 0 or 1: Impala does not guarantee that a larger result set will be returned in one go. Use fetch_until() for repeated fetches.""" assert expected_num_rows is None or expected_num_rows in (0, 1) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows = size if expected_num_rows is not None: assert self.get_num_rows( fetch_results_resp.results) == expected_num_rows return fetch_results_resp def fetch_until(self, handle, orientation, size, expected_num_rows=None): """Tries to fetch exactly 'size' rows from the given query handle, with the given fetch orientation, by repeatedly issuing fetch(size - num rows already fetched) calls. Returns fewer than 'size' rows if either a fetch() returns 0 rows (indicating EOS) or 'expected_num_rows' rows are returned. If 'expected_num_rows' is set to None, it defaults to 'size', so that the effect is to both ask for and expect the same number of rows.""" assert expected_num_rows is None or (size >= expected_num_rows) fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = size fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) num_rows_fetched = self.get_num_rows(fetch_results_resp.results) if expected_num_rows is None: expected_num_rows = size while num_rows_fetched < expected_num_rows: # Always try to fetch at most 'size' fetch_results_req.maxRows = size - num_rows_fetched fetch_results_req.orientation = TCLIService.TFetchOrientation.FETCH_NEXT fetch_results_resp = self.hs2_client.FetchResults( fetch_results_req) HS2TestSuite.check_response(fetch_results_resp) last_fetch_size = self.get_num_rows(fetch_results_resp.results) assert last_fetch_size > 0 num_rows_fetched += last_fetch_size assert num_rows_fetched == expected_num_rows def fetch_fail(self, handle, orientation, expected_error_prefix): """Attempts to fetch rows from the query identified by the given operation handle. Asserts that the fetch returns an error with an error message matching the given expected_error_prefix.""" fetch_results_req = TCLIService.TFetchResultsReq() fetch_results_req.operationHandle = handle fetch_results_req.orientation = orientation fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS, expected_error_prefix) return fetch_results_resp def result_metadata(self, handle): """ Gets the schema for the query identified by the handle """ req = TCLIService.TGetResultSetMetadataReq() req.operationHandle = handle resp = self.hs2_client.GetResultSetMetadata(req) HS2TestSuite.check_response(resp) return resp def column_results_to_string(self, columns): """Quick-and-dirty way to get a readable string to compare the output of a columnar-oriented query to its expected output""" formatted = "" num_rows = 0 # Determine the number of rows by finding the type of the first column for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(columns[0], col_type) if typed_col != None: num_rows = len(typed_col.values) break for i in xrange(num_rows): row = [] for c in columns: for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: typed_col = getattr(c, col_type) if typed_col != None: indicator = ord(typed_col.nulls[i / 8]) if indicator & (1 << (i % 8)): row.append("NULL") else: row.append(str(typed_col.values[i])) break formatted += (", ".join(row) + "\n") return (num_rows, formatted) def get_operation_status(self, operation_handle): """Executes GetOperationStatus with the given operation handle and returns the TGetOperationStatusResp""" get_operation_status_req = TCLIService.TGetOperationStatusReq() get_operation_status_req.operationHandle = operation_handle get_operation_status_resp = \ self.hs2_client.GetOperationStatus(get_operation_status_req) return get_operation_status_resp def wait_for_operation_state(self, operation_handle, expected_state, \ timeout = 10, interval = 1): """Waits for the operation to reach expected_state by polling GetOperationStatus every interval seconds, returning the TGetOperationStatusResp, or raising an assertion after timeout seconds.""" start_time = time() while (time() - start_time < timeout): get_operation_status_resp = self.get_operation_status( operation_handle) HS2TestSuite.check_response(get_operation_status_resp) if get_operation_status_resp.operationState is expected_state: return get_operation_status_resp sleep(interval) assert False, 'Did not reach expected operation state %s in time, actual state was ' \ '%s' % (expected_state, get_operation_status_resp.operationState)