Exemplo n.º 1
0
def main(argv):
  p = argparse.ArgumentParser()
  add_common_args(p)
  # Since THeaderTransport acts as framed transport when detected frame, we
  # cannot use --transport=framed as it would result in 2 layered frames.
  p.add_argument('--override-transport')
  p.add_argument('--override-protocol')
  args = p.parse_args()
  assert args.protocol == 'header'
  assert args.transport == 'buffered'
  assert not args.ssl

  sock = TSocket(args.host, args.port, socket_family=socket.AF_INET)
  if not args.override_transport or args.override_transport == 'buffered':
    trans = TBufferedTransport(sock)
  elif args.override_transport == 'framed':
    print('TFRAMED')
    trans = TFramedTransport(sock)
  else:
    raise ValueError('invalid transport')
  trans.open()

  if not args.override_protocol or args.override_protocol == 'binary':
    proto = TBinaryProtocol(trans)
  elif args.override_protocol == 'compact':
    proto = TCompactProtocol(trans)
  else:
    raise ValueError('invalid transport')

  test_void(proto)
  test_void(proto)

  trans.close()
Exemplo n.º 2
0
 def __init__(self,
              host=None,
              port=10000,
              authMechanism=None,
              user=None,
              password=None,
              database=None,
              cursorclass=Cursor):
     authMechanisms = set(['NOSASL', 'PLAIN', 'KERBEROS', 'LDAP'])
     if authMechanism not in authMechanisms or authMechanism == 'KERBEROS':
         raise NotImplementedError(
             'authMechanism is either not supported or not implemented')
     #Must set a password for thrift, even if it doesn't need one
     #Open issue with python-sasl
     if authMechanism == 'PLAIN' and (password is None
                                      or len(password) == 0):
         password = '******'
     socket = TSocket(host, port)
     self.cursorclass = cursorclass
     if authMechanism == 'NOSASL':
         transport = TBufferedTransport(socket)
     else:
         saslc = sasl.Client()
         saslc.setAttr("username", user)
         saslc.setAttr("password", password)
         saslc.init()
         transport = TSaslClientTransport(saslc, "PLAIN", socket)
     self.client = TCLIService.Client(TBinaryProtocol(transport))
     transport.open()
     res = self.client.OpenSession(TOpenSessionReq())
     self.session = res.sessionHandle
     if database is not None:
         with self.cursor() as cur:
             query = "USE {0}".format(database)
             cur.execute(query)
def main(args):
    if(len(args) < 4):
        print "%s tablename column pattern output[option]"%(args[0])
        sys.exit(1)

    tablename=args[1]
    column = args[2]
    pattern = args[3]

    outputfile = ""
    if(len(args)>4):
        outputfile=args[4]
    
    getConfiguration('host.properties')
    
    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    
    global client
    client = Hbase.Client(protocol)

#    tablename = "%s_%s_master_%s"%(orgId,subOrgId,orgId)

    rowlist = columnGrep(tablename,column,pattern)
    
    print len(rowlist)
    printStdout(rowlist,outputfile)
Exemplo n.º 4
0
 def __init__(self, host=None, port=10000, authMechanism=None, user=None, password=None, database=None, cursorclass = Cursor):
     authMechanisms = set(['NOSASL', 'PLAIN', 'KERBEROS', 'LDAP'])
     if authMechanism not in authMechanisms or authMechanism == 'KERBEROS':
         raise NotImplementedError('authMechanism is either not supported or not implemented')
     #Must set a password for thrift, even if it doesn't need one
     #Open issue with python-sasl
     if authMechanism == 'PLAIN' and (password is None or len(password) == 0):
         password = '******'
     socket = TSocket(host, port)
     self.cursorclass = cursorclass
     if authMechanism == 'NOSASL':
         transport = TBufferedTransport(socket)
     else:
         saslc = sasl.Client()
         saslc.setAttr("username", user)
         saslc.setAttr("password", password)
         saslc.init()
         transport = TSaslClientTransport(saslc, "PLAIN", socket)
     self.client = TCLIService.Client(TBinaryProtocol(transport))
     transport.open()
     res = self.client.OpenSession(TOpenSessionReq())
     self.session = res.sessionHandle
     if database is not None:
         with self.cursor() as cur:
             query = "USE {0}".format(database)
             cur.execute(query) 
Exemplo n.º 5
0
class HS2TestSuite(ImpalaTestSuite):
    def setup(self):
        host, port = IMPALAD_HS2_HOST_PORT.split(":")
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = TCLIService.Client(self.protocol)

    def teardown(self):
        if self.socket:
            self.socket.close()

    @staticmethod
    def check_response(
            response,
            expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS,
            expected_error_prefix=None):
        assert response.status.statusCode == expected_status_code
        if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
           and expected_error_prefix is not None:
            assert response.status.errorMessage.startswith(
                expected_error_prefix)

    def close(self, op_handle):
        close_op_req = TCLIService.TCloseOperationReq()
        close_op_req.operationHandle = op_handle
        close_op_resp = self.hs2_client.CloseOperation(close_op_req)
        assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

    def fetch(self, handle, orientation, size, expected_num_rows=None):
        """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns
    a success status, and that the number of rows returned is equal to size, or
    equal to the given expected_num_rows (it one was given)."""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = size
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        num_rows = size
        if expected_num_rows is not None:
            num_rows = expected_num_rows
        assert len(fetch_results_resp.results.rows) == num_rows
        return fetch_results_resp

    def fetch_fail(self, handle, orientation, expected_error_prefix):
        """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = 100
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp,
                                    TCLIService.TStatusCode.ERROR_STATUS,
                                    expected_error_prefix)
        return fetch_results_resp
Exemplo n.º 6
0
class HbaseClient:
    def __init__(self, host, port):
        self.transport = TBufferedTransport(TSocket(host, port))
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.client = Hbase.Client(self.protocol)
        self.scan = TScan()

    def createTable(self, table, contents):
        return self.client.createTable(table, [contents])

    def mutateRow(self, table, row, mutations, st):
        return self.client.mutateRow(table, row, mutations, st)

    def getTable(self):
        return self.client.getTableNames()

    def scannerGetList(self, tableName, num):
        id = self.client.scannerOpenWithScan(tableName, self.scan, None)
        return self.client.scannerGetList(id, num)

    def scannerGet(self, tableName):
        id = self.client.scannerOpenWithScan(tableName, self.scan, None)
        return self.client.scannerGet(id)

    def close(self):
        self.transport.close()
Exemplo n.º 7
0
def main(argv):
    p = argparse.ArgumentParser()
    add_common_args(p)
    # Since THeaderTransport acts as framed transport when detected frame, we
    # cannot use --transport=framed as it would result in 2 layered frames.
    p.add_argument('--override-transport')
    p.add_argument('--override-protocol')
    args = p.parse_args()
    assert args.protocol == 'header'
    assert args.transport == 'buffered'
    assert not args.ssl

    sock = TSocket(args.host, args.port, socket_family=socket.AF_INET)
    if not args.override_transport or args.override_transport == 'buffered':
        trans = TBufferedTransport(sock)
    elif args.override_transport == 'framed':
        print('TFRAMED')
        trans = TFramedTransport(sock)
    else:
        raise ValueError('invalid transport')
    trans.open()

    if not args.override_protocol or args.override_protocol == 'binary':
        proto = TBinaryProtocol(trans)
    elif args.override_protocol == 'compact':
        proto = TCompactProtocol(trans)
    else:
        raise ValueError('invalid transport')

    test_void(proto)
    test_void(proto)

    trans.close()
Exemplo n.º 8
0
    def __init__(self, host=None, port=10000, authMechanism=None, user=None, password=None, database=None, configuration=None, timeout=None):
        authMechanisms = set(['NOSASL', 'PLAIN', 'KERBEROS', 'LDAP'])
        if authMechanism not in authMechanisms:
            raise NotImplementedError('authMechanism is either not supported or not implemented')
        #Must set a password for thrift, even if it doesn't need one
        #Open issue with python-sasl
        if authMechanism == 'PLAIN' and (password is None or len(password) == 0):
            password = '******'
        socket = TSocket(host, port)
        socket.setTimeout(timeout)
        if authMechanism == 'NOSASL':
            transport = TBufferedTransport(socket)
        else:
            sasl_mech = 'PLAIN'
            saslc = sasl.Client()
            saslc.setAttr("username", user)
            saslc.setAttr("password", password)
            if authMechanism == 'KERBEROS':
                krb_host,krb_service = self._get_krb_settings(host, configuration)
                sasl_mech = 'GSSAPI'
                saslc.setAttr("host", krb_host)
                saslc.setAttr("service", krb_service)

            saslc.init()
            transport = TSaslClientTransport(saslc, sasl_mech, socket)

        self.client = TCLIService.Client(TBinaryProtocol(transport))
        transport.open()
        res = self.client.OpenSession(TOpenSessionReq(username=user, password=password, configuration=configuration))
        self.session = res.sessionHandle
        if database is not None:
            with self.cursor() as cur:
                query = "USE {0}".format(database)
                cur.execute(query) 
Exemplo n.º 9
0
    def __init__(self, host, port):
        transport = TBufferedTransport(TSocket(host, port))
        transport.open()
        protocol = TBinaryProtocol.TBinaryProtocol(transport)

        self.client = HBaseThrift.Client(protocol)
        self.client
Exemplo n.º 10
0
    def __init__( self, host, port ):
        transport = TBufferedTransport(TSocket(host, port))
        transport.open()
        protocol = TBinaryProtocol.TBinaryProtocol(transport)

        self.client = HBaseThrift.Client(protocol)
        self.client
Exemplo n.º 11
0
    def __init__(self,
                 host=None,
                 port=10000,
                 authMechanism=None,
                 user=None,
                 password=None,
                 database=None,
                 configuration=None):
        super(Connection, self).__init__(authMechanism)
        #Must set a password for thrift, even if it doesn't need one
        #Open issue with python-sasl
        password = self._check_password(authMechanism, password)
        socket = TSocket(host, port)
        if authMechanism == 'NOSASL':
            transport = TBufferedTransport(socket)
        else:
            saslc, sasl_mech = self._get_sasl_client(host, authMechanism, user,
                                                     password, configuration)
            transport = TSaslClientTransport(saslc, sasl_mech, socket)

        self.client = TCLIService.Client(TBinaryProtocol(transport))
        transport.open()
        res = self.client.OpenSession(
            TOpenSessionReq(configuration=configuration))
        self.session = res.sessionHandle
        if database is not None:
            with self.cursor() as cur:
                query = "USE {0}".format(database)
                cur.execute(query)
Exemplo n.º 12
0
def main(args):

#    getColumnInfo(table_name)            

    if(len(args)<2):
        print "TableScan.py tableName No[10]"
        sys.exit(1)

    table_name=args[1]
    NO=10;
    if(len(args)<3):
        NO=10; 
    else:
        NO=int(args[2]);

    getConfiguration('host.properties')

    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    global client
    client = Hbase.Client(protocol)

    ret=getRowsLimit(table_name,NO)
    printRowsResult(ret)
Exemplo n.º 13
0
    def _connect(self):
        if hasattr(self.context.config, 'HBASE_STORAGE_SERVER_HOSTS'):
            host = self.context.config.HBASE_STORAGE_SERVER_HOSTS[
                (self.context.server.port + self.hbase_server_offset) %
                len(self.context.config.HBASE_STORAGE_SERVER_HOSTS)]
        else:
            host = self.context.config.HBASE_STORAGE_SERVER_HOST

        transport = TBufferedTransport(
            TSocket(host=host,
                    port=self.context.config.HBASE_STORAGE_SERVER_PORT))

        socket = TSocket(host=host,
                         port=self.context.config.HBASE_STORAGE_SERVER_PORT)
        # Timeout is sum of HTTP timeouts, plus a bit.
        try:
            timeout = 5
            socket.setTimeout(timeout * 1000)
        except:
            pass

        try:
            transport = TBufferedTransport(socket)
            transport.open()
            protocol = TBinaryProtocol.TBinaryProtocol(transport)
            self.storage = Hbase.Client(protocol)
            logger.info("Connected to HBase server " + host + ":" +
                        str(self.context.config.HBASE_STORAGE_SERVER_PORT))
        except:
            logger.error("Error connecting to HBase server " + host + ":" +
                         str(self.context.config.HBASE_STORAGE_SERVER_PORT))
            self.hbase_server_offset = self.hbase_server_offset + 1
Exemplo n.º 14
0
def main(args):

    if (len(args) < 2):
        print "%s <verified file> -all" % (args[0])
        sys.exit(1)

    filename = args[1]
    opt_all = True if len(args) > 2 and args[2] == "-all" else False

    filenamearray = filename.split("_")
    orgId = filenamearray[0]
    subOrgId = filenamearray[1]

    getConfiguration('host.properties')

    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)

    global client
    client = Hbase.Client(protocol)

    tablename = "%s_%s_master_%s" % (orgId, subOrgId, orgId)

    for line in open(filename, "r"):
        input = line.strip()
        row = client.getRow(tablename, input)
        print input
        printRow(row)
        print ""
        if (not opt_all): break

    transport.close()
Exemplo n.º 15
0
 def _open_hs2_connection():
     """Opens a HS2 connection, returning the socket and the thrift client."""
     host, port = IMPALAD_HS2_HOST_PORT.split(":")
     socket = TSocket(host, port)
     transport = TBufferedTransport(socket)
     transport.open()
     protocol = TBinaryProtocol.TBinaryProtocol(transport)
     hs2_client = ImpalaHiveServer2Service.Client(protocol)
     return socket, hs2_client
Exemplo n.º 16
0
def connect(server='localhost', port=9090, timeout=None):
    socket = TSocket(server, int(port))
    if timeout is not None:
        socket.setTimeout(timeout)
    transport = TBufferedTransport(socket)
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
    client = Hbase.Client(protocol)
    return client
Exemplo n.º 17
0
    def __init__(self,context):
        self.context=context
        self.table = self.context.config.HBASE_STORAGE_TABLE
        self.data_fam = self.context.config.HBASE_STORAGE_FAMILY
        transport = TBufferedTransport(TSocket(host=self.context.config.HBASE_STORAGE_SERVER_HOST, port=self.context.config.HBASE_STORAGE_SERVER_PORT))
        transport.open()
        protocol = TBinaryProtocol.TBinaryProtocol(transport)

        self.storage = Hbase.Client(protocol)
Exemplo n.º 18
0
 def create_hs2_client(self):
     """Creates a new HS2 client connection to the impalad"""
     host, port = (self.hostname, self.hs2_port)
     socket = TSocket(host, port)
     transport = TBufferedTransport(socket)
     transport.open()
     protocol = TBinaryProtocol.TBinaryProtocol(transport)
     hs2_client = TCLIService.Client(protocol)
     return hs2_client
Exemplo n.º 19
0
class HS2TestSuite(ImpalaTestSuite):
  def setup(self):
    host, port = IMPALAD_HS2_HOST_PORT.split(":")
    self.socket = TSocket(host, port)
    self.transport = TBufferedTransport(self.socket)
    self.transport.open()
    self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
    self.hs2_client = TCLIService.Client(self.protocol)

  def teardown(self):
    if self.socket:
      self.socket.close()

  @staticmethod
  def check_response(response,
                       expected_status_code = TCLIService.TStatusCode.SUCCESS_STATUS,
                       expected_error_prefix = None):
    assert response.status.statusCode == expected_status_code
    if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
       and expected_error_prefix is not None:
      assert response.status.errorMessage.startswith(expected_error_prefix)

  def close(self, op_handle):
    close_op_req = TCLIService.TCloseOperationReq()
    close_op_req.operationHandle = op_handle
    close_op_resp = self.hs2_client.CloseOperation(close_op_req)
    assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

  def fetch(self, handle, orientation, size, expected_num_rows = None):
    """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns
    a success status, and that the number of rows returned is equal to size, or
    equal to the given expected_num_rows (it one was given)."""
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = size
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp)
    num_rows = size
    if expected_num_rows is not None:
      num_rows = expected_num_rows
    assert len(fetch_results_resp.results.rows) == num_rows
    return fetch_results_resp

  def fetch_fail(self, handle, orientation, expected_error_prefix):
    """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
    fetch_results_req = TCLIService.TFetchResultsReq()
    fetch_results_req.operationHandle = handle
    fetch_results_req.orientation = orientation
    fetch_results_req.maxRows = 100
    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
    HS2TestSuite.check_response(fetch_results_resp, TCLIService.TStatusCode.ERROR_STATUS,
                                expected_error_prefix)
    return fetch_results_resp
Exemplo n.º 20
0
def connect(server='localhost', port=9090, timeout=None):
    socket = TSocket(server, int(port))
    if timeout is not None:
        socket.setTimeout(timeout)
    transport = TBufferedTransport(socket)
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
    client = Hbase.Client(protocol)
    return client
Exemplo n.º 21
0
def getMasterTables(hbaseHost):
    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Hbase.Client(protocol)

    for table in client.getTableNames():
        if 'master' in table:
            print table

    transport.close()
Exemplo n.º 22
0
def main(args):

    getConfiguration('host.properties')

    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    global client
    client = Hbase.Client(protocol)

    getTableNames()
Exemplo n.º 23
0
def getMasterTables(hbaseHost):
    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Hbase.Client(protocol)

    for table in client.getTableNames():
        if 'master' in table:
            print table

    transport.close()
Exemplo n.º 24
0
def main(args):

    getConfiguration('host.properties')

    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    global client
    client = Hbase.Client(protocol)

    getTableNames()
Exemplo n.º 25
0
    def __init__(self,
                 unix_socket=None,
                 host=None,
                 port=10000,
                 authMechanism=None,
                 user=None,
                 password=None,
                 database=None,
                 configuration=None,
                 timeout=None):
        authMechanisms = set(['NOSASL', 'PLAIN', 'KERBEROS', 'LDAP'])
        if authMechanism not in authMechanisms:
            raise NotImplementedError(
                'authMechanism is either not supported or not implemented')
        #Must set a password for thrift, even if it doesn't need one
        #Open issue with python-sasl
        if authMechanism == 'PLAIN' and (password is None
                                         or len(password) == 0):
            password = '******'
        if unix_socket is not None:
            socket = TSocket(unix_socket=unix_socket)
        else:
            socket = TSocket(host, port)
        socket.setTimeout(timeout)
        if authMechanism == 'NOSASL':
            transport = TBufferedTransport(socket)
        else:
            sasl_mech = 'PLAIN'
            saslc = sasl.Client()
            saslc.setAttr("username", user)
            saslc.setAttr("password", password)
            if authMechanism == 'KERBEROS':
                krb_host, krb_service = self._get_krb_settings(
                    host, configuration)
                sasl_mech = 'GSSAPI'
                saslc.setAttr("host", krb_host)
                saslc.setAttr("service", krb_service)

            saslc.init()
            transport = TSaslClientTransport(saslc, sasl_mech, socket)

        self.client = TCLIService.Client(TBinaryProtocol(transport))
        transport.open()
        res = self.client.OpenSession(
            TOpenSessionReq(username=user,
                            password=password,
                            configuration=configuration))
        self.session = res.sessionHandle
        if database is not None:
            with self.cursor() as cur:
                query = "USE {0}".format(database)
                cur.execute(query)
Exemplo n.º 26
0
 def hs2_port_is_open(self):
     """Test if the HS2 port is open. Does not need to authenticate."""
     # Impyla will try to authenticate as part of connecting, so preserve previous logic
     # that uses the HS2 thrift code directly.
     try:
         socket = TSocket(self.hostname, self.hs2_port)
         transport = TBufferedTransport(socket)
         transport.open()
         transport.close()
         return True
     except Exception, e:
         LOG.info(e)
         return False
Exemplo n.º 27
0
 def hs2_port_is_open(self):
   """Test if the HS2 port is open. Does not need to authenticate."""
   # Impyla will try to authenticate as part of connecting, so preserve previous logic
   # that uses the HS2 thrift code directly.
   try:
     socket = TSocket(self.hostname, self.hs2_port)
     transport = TBufferedTransport(socket)
     transport.open()
     transport.close()
     return True
   except Exception, e:
     LOG.info(e)
     return False
Exemplo n.º 28
0
def start():

    logger = logging.getLogger('ted')
    logger.addHandler(logging.StreamHandler())

    transport = TBufferedTransport(TSocket('localhost', 9030))
    transport.open()

    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    global client
    client = ted.TedService.Client(protocol)

    shell = code.InteractiveConsole(globals())
    shell.interact("Run client.<command> where command is a Ted command (eg: getWatching())\nSee dir(Iface) for commands")
Exemplo n.º 29
0
class ThriftClient(object):
    def __init__(self, port=6458, host="127.0.0.1"):
        self.host = host
        self.port = port
        self.client = None
        self.transport = None

    def connect(self):
        self.transport = TSocket(self.host, self.port)
        self.transport = TBufferedTransport(self.transport)
        protocol = TBinaryProtocol(self.transport)
        self.client = Client(protocol)
        self.transport.open()

    def execute(self, name, *args, **kwargs):
        if self.client is None:
            self.connect()
        result = getattr(self.client, name)(*args, **kwargs)
        return result

    def create(self, key, second, minute = -1, hour = -1, day = -1, month = -1, week = -1, action="shell", params={}):
        return self.execute("create", key, second, minute, hour, day, month, week, action, params)

    def create_timeout(self, key, second, minute = -1, hour = -1, day = -1, month = -1, week = -1, count=1, action="shell", params={}):
        return self.execute("createTimeout", key, second, minute, hour, day, month, week, count, action, params)

    def remove(self, key):
        return self.execute("remove", key)

    def get(self, key):
        return self.execute("get", key)

    def get_current(self):
        return self.execute("getCurrent")

    def get_time(self, timestamp):
        return self.execute("getTime", timestamp)

    def get_keys(self, prefix=''):
        return self.execute("getKeys", prefix)

    def info(self):
        return self.execute("info")

    def __del__(self):
        if self.client:
            self.transport.close()
            self.transport = None
            self.client = None
Exemplo n.º 30
0
class ThriftClient(object):
    def __init__(self, port=6458, host="127.0.0.1"):
        self.host = host
        self.port = port
        self.client = None
        self.transport = None

    def connect(self):
        self.transport = TSocket(self.host, self.port)
        self.transport = TBufferedTransport(self.transport)
        protocol = TBinaryProtocol(self.transport)
        self.client = Client(protocol)
        self.transport.open()

    def execute(self, name, *args, **kwargs):
        if self.client is None:
            self.connect()
        result = getattr(self.client, name)(*args, **kwargs)
        return result

    def create(self, key, second, minute = -1, hour = -1, day = -1, month = -1, week = -1, action="shell", params={}):
        return self.execute("create", key, second, minute, hour, day, month, week, action, params)

    def create_timeout(self, key, second, minute = -1, hour = -1, day = -1, month = -1, week = -1, count=1, action="shell", params={}):
        return self.execute("createTimeout", key, second, minute, hour, day, month, week, count, action, params)

    def remove(self, key):
        return self.execute("remove", key)

    def get(self, key):
        return self.execute("get", key)

    def get_current(self):
        return self.execute("getCurrent")

    def get_time(self, timestamp):
        return self.execute("getTime", timestamp)

    def get_keys(self, prefix=''):
        return self.execute("getKeys", prefix)

    def __del__(self):
        if self.client:
            self.transport.close()
            self.transport = None
            self.client = None
Exemplo n.º 31
0
 def __init__(self, host=None, port=10000, authMechanism=None, user=None, password=None, database=None):
     authMechanisms = {'NOSASL', 'PLAIN', 'KERBEROS', 'LDAP'}
     if authMechanism not in authMechanisms or authMechanism == 'KERBEROS':
         raise NotImplementedError('authMechanism is either not supported or not implemented')
     socket = TSocket(host, port)
     if authMechanism == 'NOSASL':
         transport = TBufferedTransport(socket)
     else:
         saslc = sasl.Client()
         saslc.setAttr("username", user)
         saslc.setAttr("password", password)
         saslc.init()
         transport = TSaslClientTransport(saslc, "PLAIN", socket)
     self.client = TCLIService.Client(TBinaryProtocol(transport))
     transport.open()
     res = self.client.OpenSession(TOpenSessionReq())
     self.session = res.sessionHandle
def main(args):

    getConfiguration('host.properties')

    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    global client
    client = Hbase.Client(protocol)

    tableList = getMasterTables()

    # For test
    #table = 'lyris_uptiilt6_master_lyris'
    #columnProcess(table)

    for table in tableList:
        columnProcess(table)
class writeThread(threading.Thread):
    def __init__(self, threadname, RecordsThreadwillwrite):
        threading.Thread.__init__(self, name = threadname)
        bytesPerColumn = int(bytesPerRecord/columns) - 11 #suppose 3 columns

        self.columnvalue = "value_" + "x"*bytesPerColumn + "_endv"
        self.tbwBatch = int (RecordsThreadwillwrite / recordsPerBatch)
        
        self.transport = TBufferedTransport(TSocket('10.1.2.230', 9090), 40960)
        self.transport.open()
        protocol = TBinaryProtocol.TBinaryProtocol(self.transport)

        self.client = Hbase.Client(protocol)
                        
    def run(self):
        print "+%s start" % (self.getName())
        global gEndT
        global gWritenItems           
        
        threadWritenItem = 0
        for loopidx in xrange(0, self.tbwBatch):            
            self.write_hbase() #write                                           
            threadWritenItem += recordsPerBatch   
            
        mylock.acquire()
        gEndT = time.time()  
        gWritenItems += threadWritenItem
        print "%s done, %s seconds past, %d reocrds saved" % (self.getName(), gEndT-gStartT, gWritenItems)
        mylock.release()
        self.transport.close()
             
        
        
        
    def write_hbase(self): #write 50 rowkyes, and  3 column families in each rowkey
        print self.getName(), "Start write"
        batchmutations = []
        for i in xrange(0, recordsPerBatch): # write to db, 300 items together
            mutations = []
            rowkey = "RK_%s_%s" % (random.random(), time.time())       
            for ii in xrange(0, columns):
                mutations.append(Hbase.Mutation(column="f1:%s"%ii, value=self.columnvalue))
            batchmutations.append(Hbase.BatchMutation(rowkey, mutations))
        self.client.mutateRows("testdb1", batchmutations)        
Exemplo n.º 34
0
    def setup(self):
        transport = TBufferedTransport(TSocket(host='localhost', port=9090))
        transport.open()
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        self.connection = Hbase.Client(protocol)
        self.table = 'thumbor-test'
        self.family = 'images:'

        columns = []
        col = ttypes.ColumnDescriptor()
        col.name = self.family
        col.maxVersions = 1
        columns.append(col)
        try:
            self.connection.disableTable(self.table)
            self.connection.deleteTable(self.table)
        except ttypes.IOError:
            pass
        self.connection.createTable(self.table, columns)
def main(args):

    getConfiguration('host.properties')

    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    global client
    client = Hbase.Client(protocol)

    
    tableList=getMasterTables()

    # For test
    #table = 'lyris_uptiilt6_master_lyris'
    #columnProcess(table)
    
    for table in tableList:
        columnProcess(table)
Exemplo n.º 36
0
def main(args):
    if(len(args) < 2):
        print "%s tablename" %(args[0])
        sys.exit(1)

    tablename=args[1]

    getConfiguration('host.properties')
    
    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    
    global client
    client = Hbase.Client(protocol)

#    tablename = "%s_%s_master_%s"%(orgId,subOrgId,orgId)

    rowPrint(tablename)
Exemplo n.º 37
0
    def setup(self):
        transport = TBufferedTransport(TSocket(host='localhost', port=9090))
        transport.open()
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        self.connection = Hbase.Client(protocol)
        self.table='thumbor-test'
        self.family='images:'

        columns = []
        col = ttypes.ColumnDescriptor()
        col.name = self.family
        col.maxVersions = 1
        columns.append(col)
        try:
            self.connection.disableTable(self.table)
            self.connection.deleteTable(self.table)
        except ttypes.IOError:
            pass
	self.connection.createTable(self.table, columns)
Exemplo n.º 38
0
class MyHbase:
    """
    the main class,use to connect to hbase,create ,select table etc.
    """

    def __init__(self, netloc, port, table="diracAccounting"):
        self.tableName = table

        self.transport = TBufferedTransport(TSocket(netloc, port))
        self.protocol = TBinaryProtocol(self.transport)
        self.client = Hbase.Client(self.protocol)
        self.transport.open()

        tables = self.client.getTableNames()
        # check if has table 'diracAccounting',if has then delete it and then recreate it
        for table in tables:
            if table == "diracAccounting":
                if self.client.isTableEnabled(table):
                    print "disabling table:%s" % (table)
                    self.client.disableTable(table)
            print "deleting table:%s" % (table)
            self.client.deleteTable(table)
            # if self.tableName not in tables:
            self.__createTable(["groupby", "generate"])

    def __del__(self):
        self.transport.close()

    def __createTable(self, columnfamilyList):
        """argument:
            columnfamilyList is a list
            the columnfanilyList=['gruopby','generate']
        """
        columns = []
        for name in columnfamilyList:
            col = ColumnDescriptor(name)
            columns.append(col)
        print "creating tables:%s" % (self.tableName)
        try:
            self.client.createTable(self.tableName, columns)
        except AlreadyExists, ae:
            print "WARN: " + ae.message
Exemplo n.º 39
0
    def __init__(self, host=None, port=10000, authMechanism=None, user=None, password=None, database=None, configuration=None):
        super(Connection, self).__init__(authMechanism)
        #Must set a password for thrift, even if it doesn't need one
        #Open issue with python-sasl
        password = self._check_password(authMechanism, password)
        socket = TSocket(host, port)
        if authMechanism == 'NOSASL':
            transport = TBufferedTransport(socket)
        else:
            saslc, sasl_mech = self._get_sasl_client(host, authMechanism, user, password, configuration)
            transport = TSaslClientTransport(saslc, sasl_mech, socket)

        self.client = TCLIService.Client(TBinaryProtocol(transport))
        transport.open()
        res = self.client.OpenSession(TOpenSessionReq(configuration=configuration))
        self.session = res.sessionHandle
        if database is not None:
            with self.cursor() as cur:
                query = "USE {0}".format(database)
                cur.execute(query)
Exemplo n.º 40
0
def main(args):

    if (len(args) < 3):
        print "%s hbasehost tablename " % args[0]
        sys.exit(1)

    hbaseHost = args[1]

    table_name = args[2]

    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)

    global client
    client = Hbase.Client(protocol)

    updateColumn(table_name)

    transport.close()
def main(args):
    
    if (len(args)<3):
        print "%s hbasehost tablename " % args[0]
        sys.exit(1)


    hbaseHost = args[1]

    table_name= args[2]

    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    
    global client
    client = Hbase.Client(protocol)
    
    updateColumn(table_name)
    
    transport.close()
class ServidorArchivos:
    host = "localhost"
    puerto = 42778

    def __init__(self):
        socket = TSocket(self.host, self.puerto)
        self.transport = TBufferedTransport(socket)
        protocolo = TBinaryProtocol(self.transport)
        self.conexion = Client(protocolo)

    def guardar_archivo(self, archivo: FileStorage, ruta: str):
        imagen = Imagen(archivo.read(), ruta)
        self.transport.open()
        resultado = self.conexion.guardarArchivo(imagen)
        self.transport.close()
        return resultado

    def eliminar_archivo(self, ruta: str):
        self.transport.open()
        resultado = self.conexion.eliminarArchivo(ruta)
        self.transport.close()
        return resultado

    def obtener_archivos(self, ruta: str) -> list:
        print(ruta)
        self.transport.open()
        resultado = self.conexion.obtenerArchivos(ruta)
        self.transport.close()
        return resultado
Exemplo n.º 43
0
def do_import(n, filename):
  try:
    transport = TBufferedTransport(TSocket(THRIFT_SERVER, THRIFT_PORT))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Hbase.Client(protocol)
    
    fp = open(filename)
    tweets = json.load(fp)
    fp.close()
    for t in tweets:
      try:
        mutations = create_mutations(t)
        client.mutateRow('tweets', t['idstr'], mutations, None)
      except Exception, e:
        with print_lock:
          print('[WARNING]Tweet id:%s (file: %s) caused an exception: %s' % (t['idstr'], filename, e))
    try:
      open(filename + '.done', 'w').close()
    except:
      # ignored
      pass
Exemplo n.º 44
0
    def __init__(self, host=None, port=10000, authMechanism="PLAIN", user=None, password=None, database=None,
                 configuration=None, timeout=None):
        authMechanisms = {"PLAIN", "NOSASL"}
        if authMechanism not in authMechanisms:
            raise NotImplementedError("authMechanism '{}' is either not supported or not implemented".format(authMechanism))

        socket = TSocket.TSocket(host, port)
        socket.setTimeout(timeout)

        if authMechanism == "NOSASL":
            transport = TBufferedTransport(socket)
        else:  # authMechanism == "PLAIN":
            password = "******" if (password is None or len(password) == 0) else password
            transport = TSaslClientTransport(socket, host=host, service=None, mechanism=authMechanism,
                                             username=user, password=password)
        self.client = TCLIService.Client(TBinaryProtocol(transport))
        transport.open()
        res = self.client.OpenSession(TOpenSessionReq(username=user, password=password, configuration=configuration))
        self.session = res.sessionHandle
        if database is not None:
            with self.cursor() as cur:
                query = "USE {0}".format(database)
                cur.execute(query)
Exemplo n.º 45
0
def main(argv):
  p = argparse.ArgumentParser()
  add_common_args(p)
  # Since THeaderTransport acts as framed transport when detected frame, we
  # cannot use --transport=framed as it would result in 2 layered frames.
  p.add_argument('--override-transport')
  args = p.parse_args()
  assert args.protocol == 'header'
  assert args.transport == 'buffered'
  assert not args.ssl

  sock = TSocket(args.host, args.port, socket_family=socket.AF_INET)
  if not args.override_transport or args.override_transport == 'buffered':
    trans = TBufferedTransport(sock)
  elif args.override_transport == 'framed':
    trans = TFramedTransport(sock)
  else:
    raise ValueError('invalid transport')
  trans.open()
  proto = TBinaryProtocol(trans)
  proto.writeMessageBegin('testVoid', TMessageType.CALL, 3)
  proto.writeStructBegin('testVoid_args')
  proto.writeFieldStop()
  proto.writeStructEnd()
  proto.writeMessageEnd()
  trans.flush()

  _, mtype, _ = proto.readMessageBegin()
  assert mtype == TMessageType.REPLY
  proto.readStructBegin()
  _, ftype, _ = proto.readFieldBegin()
  assert ftype == TType.STOP
  proto.readFieldEnd()
  proto.readStructEnd()
  proto.readMessageEnd()

  trans.close()
Exemplo n.º 46
0
class MyHbase():
    def __init__(self,netloc,port,tablename):
        self.tablename = tablename

        self.transport = TBufferedTransport(TSocket(netloc,port))
        self.protocol = TBinaryProtocol(self.transport)
        self.client = Hbase.Client(self.protocol)
        self.transport.open()
        if self.tablename in self.client.getTableNames():
            if not self.client.isTableEnabled(self.tablename):
                print 'table off_line, start bring it on_line now'
                self.client.enableTable(self.tablename)
            print "successfully connect table: %s"%(self.tablename)
            print
        else:
            print "wrong table name"
    def __del__(self):
        self.transport.close()

    def printRow(self,entry):
        print "row: " + entry.row + ", cols:",
        for k in sorted(entry.columns):
            print k + " => " + entry.columns[k].value
            print
    
    def showTableInfo(self):
        '''
        show some information of the hbase table
        '''
        #print self.client.getColumnDescriptors(self.tablename)
        temp = self.client.getColumnDescriptors(self.tablename)
        for (col,desc) in temp.items():
            print "column with name:"+col
            print desc                             

    def generatePlot(self,groupbyname,generatename):
        '''
Exemplo n.º 47
0
def main(args):
    
    if(len(args) < 2):
        print "%s <verified file> -all"%(args[0])
        sys.exit(1)
    
    filename = args[1]
    opt_all = True if len(args)>2 and args[2] == "-all" else False

    filenamearray = filename.split("_")
    orgId = filenamearray[0]
    subOrgId = filenamearray[1]
    
    getConfiguration('host.properties')

    
    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    
    global client
    client = Hbase.Client(protocol)
        
    
    tablename = "%s_%s_master_%s"%(orgId,subOrgId,orgId);
    

    for line in open(filename,"r"):
        input=line.strip()
        row=client.getRow(tablename,input)
	print input 
        printRow(row)
	print ""
	if (not opt_all): break

    
    transport.close()
Exemplo n.º 48
0
def main(args):
    
    if(len(args)!=2):
        print "%s tableName"%(args[0])
        sys.exit(1)
    
    tablename = args[1]
    
    getConfiguration('host.properties')

    
    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    
    global client
    client = Hbase.Client(protocol)
        
    
#    tablename = "%s_%s_master_%s"%(orgId,subOrgId,"lyris");
    client.disableTable(tablename)
    client.deleteTable(tablename)

    transport.close()
Exemplo n.º 49
0
class HBaseOperator():
    def __init__(self):
        self.host = "193.169.100.33"
        self.port = 2181
        self.transport = TBufferedTransport(TSocket(self.host, self.port))
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.client = Hbase.Client(self.protocol)

    def __del__(self):
        self.transport.close()

    def getAllTablesInfo(self):
        #get table info
        listTables = self.client.getTableNames()
        print("=" * 40)
        print("Show all tables information....")
        for tableName in listTables:
            print("TableName:" + tableName)
            listColumns = self.client.getColumnDescriptors(tableName)
            print(listColumns)
            listTableRegions = self.client.getTableRegions(tableName)
            print(listTableRegions)
            print("+" * 40)
Exemplo n.º 50
0
def main(args):

    if len(args) != 3:
        print "%s orgId subOrgId" % (args[0])
        sys.exit(1)

    orgId = args[1]
    subOrgId = args[2]

    getConfiguration("host.properties")

    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)

    global client
    client = Hbase.Client(protocol)

    tablename = "%s_%s_master_%s" % (orgId, subOrgId, orgId)

    connectSolrServer(orgId, subOrgId)
    verifyData(tablename)

    transport.close()
Exemplo n.º 51
0
def main(args):

    if (len(args) != 3):
        print "%s orgId subOrgId" % (args[0])
        sys.exit(1)

    orgId = args[1]
    subOrgId = args[2]

    getConfiguration('host.properties')

    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)

    global client
    client = Hbase.Client(protocol)

    tablename = "%s_%s_master_%s" % (orgId, subOrgId, orgId)

    connectSolrServer(orgId, subOrgId)
    verifyData(tablename)

    transport.close()
Exemplo n.º 52
0
class TestAuthorization(CustomClusterTestSuite):
    AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR'))

    def setup(self):
        host, port = (self.cluster.impalads[0].service.hostname,
                      self.cluster.impalads[0].service.hs2_port)
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

    def teardown(self):
        if self.socket:
            self.socket.close()
        shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=%s\
      --authorization_policy_provider_class=%s"                                                  %\
        (AUTH_POLICY_FILE,
         "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider"))
    def test_custom_authorization_provider(self):
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        # User is 'test_user' (defined in the authorization policy file)
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Try to query a table we are not authorized to access.
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch_seq.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to access' % 'test_user' in\
            str(execute_statement_resp)

        # Now try the same operation on a table we are authorized to access.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=%s\
      --authorized_proxy_user_config=hue=%s" % (AUTH_POLICY_FILE, getuser()))
    def test_access_runtime_profile(self):
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = getuser()
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Current user can't access view's underlying tables
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "explain select * from functional.complex_view"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to EXPLAIN' % getuser() in\
            str(execute_statement_resp)
        # User should not have access to the runtime profile
        self.__run_stmt_and_verify_profile_access(
            "select * from functional.complex_view", False, False)
        self.__run_stmt_and_verify_profile_access(
            "select * from functional.complex_view", False, True)

        # Repeat as a delegated user
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        # Delegated user is the current user
        open_session_req.configuration['impala.doas.user'] = getuser()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)
        self.session_handle = resp.sessionHandle
        # User should not have access to the runtime profile
        self.__run_stmt_and_verify_profile_access(
            "select * from functional.complex_view", False, False)
        self.__run_stmt_and_verify_profile_access(
            "select * from functional.complex_view", False, True)

        # Create a view for which the user has access to the underlying tables.
        open_session_req.username = getuser()
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = """create view if not exists tpch.customer_view as
        select * from tpch.customer limit 1"""
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        # User should be able to run EXPLAIN
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = """explain select * from tpch.customer_view"""
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        # User should have access to the runtime profile and exec summary
        self.__run_stmt_and_verify_profile_access(
            "select * from tpch.customer_view", True, False)
        self.__run_stmt_and_verify_profile_access(
            "select * from tpch.customer_view", True, True)

        # Repeat as a delegated user
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        # Delegated user is the current user
        open_session_req.configuration['impala.doas.user'] = getuser()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)
        self.session_handle = resp.sessionHandle
        # User should have access to the runtime profile and exec summary
        self.__run_stmt_and_verify_profile_access(
            "select * from tpch.customer_view", True, False)
        self.__run_stmt_and_verify_profile_access(
            "select * from tpch.customer_view", True, True)

        # Clean up
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "drop view if exists tpch.customer_view"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=%s\
      --authorized_proxy_user_config=foo=bar;hue=%s\
      --abort_on_failed_audit_event=false\
      --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, getuser(), AUDIT_LOG_DIR))
    def test_user_impersonation(self):
        """End-to-end user impersonation + authorization test"""
        self.__test_impersonation()

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        "--server_name=server1\
        --authorization_policy_file=%s\
        --authorized_proxy_user_config=hue=bar\
        --authorized_proxy_group_config=foo=bar;hue=%s\
        --abort_on_failed_audit_event=false\
        --audit_event_log_dir=%s" %
        (AUTH_POLICY_FILE, grp.getgrgid(os.getgid()).gr_name, AUDIT_LOG_DIR))
    def test_group_impersonation(self):
        """End-to-end group impersonation + authorization test"""
        self.__test_impersonation()

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
        --authorization_policy_file=%s\
        --authorized_proxy_user_config=foo=bar\
        --authorized_proxy_group_config=foo=bar\
        --abort_on_failed_audit_event=false\
        --audit_event_log_dir=%s" % (AUTH_POLICY_FILE, AUDIT_LOG_DIR))
    def test_no_matching_user_and_group_impersonation(self):
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        open_session_req.configuration['impala.doas.user'] = '******'
        resp = self.hs2_client.OpenSession(open_session_req)
        assert 'User \'hue\' is not authorized to delegate to \'abc\'' in str(
            resp)

    def __test_impersonation(self):
        """End-to-end impersonation + authorization test. Expects authorization to be
    configured before running this test"""
        # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
        # the module within this test function, rather than as a top-level import. This way
        # the tests in that module will not get pulled when executing this test suite. The fix
        # is to split the utility code out of the TestHS2 class and support HS2 as a first
        # class citizen in our test framework.
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        # Connected user is 'hue'
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        # Delegated user is the current user
        open_session_req.configuration['impala.doas.user'] = getuser()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Try to query a table we are not authorized to access.
        self.session_handle = resp.sessionHandle
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch_seq.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        assert 'User \'%s\' does not have privileges to access' % getuser() in\
            str(execute_statement_resp)

        assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\
            'No matching audit event recorded in time window'

        # Now try the same operation on a table we are authorized to access.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = "describe tpch.lineitem"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)

        TestHS2.check_response(execute_statement_resp)

        # Verify the correct user information is in the runtime profile
        query_id = operation_id_to_query_id(
            execute_statement_resp.operationHandle.operationId)
        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self.__verify_profile_user_fields(profile_page,
                                          effective_user=getuser(),
                                          delegated_user=getuser(),
                                          connected_user='******')

        # Try to user we are not authorized to delegate to.
        open_session_req.configuration['impala.doas.user'] = '******'
        resp = self.hs2_client.OpenSession(open_session_req)
        assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str(
            resp)

        # Create a new session which does not have a do_as_user.
        open_session_req.username = '******'
        open_session_req.configuration = dict()
        resp = self.hs2_client.OpenSession(open_session_req)
        TestHS2.check_response(resp)

        # Run a simple query, which should succeed.
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = resp.sessionHandle
        execute_statement_req.statement = "select 1"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        # Verify the correct user information is in the runtime profile. Since there is
        # no do_as_user the Delegated User field should be empty.
        query_id = operation_id_to_query_id(
            execute_statement_resp.operationHandle.operationId)

        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self.__verify_profile_user_fields(profile_page,
                                          effective_user='******',
                                          delegated_user='',
                                          connected_user='******')

        self.socket.close()
        self.socket = None

    def __verify_profile_user_fields(self, profile_str, effective_user,
                                     connected_user, delegated_user):
        """Verifies the given runtime profile string contains the specified values for
    User, Connected User, and Delegated User"""
        assert '\n    User: %s\n' % effective_user in profile_str
        assert '\n    Connected User: %s\n' % connected_user in profile_str
        assert '\n    Delegated User: %s\n' % delegated_user in profile_str

    def __wait_for_audit_record(self, user, impersonator, timeout_secs=30):
        """Waits until an audit log record is found that contains the given user and
    impersonator, or until the timeout is reached.
    """
        # The audit event might not show up immediately (the audit logs are flushed to disk
        # on regular intervals), so poll the audit event logs until a matching record is
        # found.
        start_time = time()
        while time() - start_time < timeout_secs:
            for audit_file_name in os.listdir(self.AUDIT_LOG_DIR):
                if self.__find_matching_audit_record(audit_file_name, user,
                                                     impersonator):
                    return True
            sleep(1)
        return False

    def __find_matching_audit_record(self, audit_file_name, user,
                                     impersonator):
        with open(os.path.join(self.AUDIT_LOG_DIR,
                               audit_file_name)) as audit_log_file:
            for line in audit_log_file.readlines():
                json_dict = json.loads(line)
                if len(json_dict) == 0: continue
                if json_dict[min(json_dict)]['user'] == user and\
                    json_dict[min(json_dict)]['impersonator'] == impersonator:
                    return True
        return False

    def __run_stmt_and_verify_profile_access(self, stmt, has_access,
                                             close_operation):
        """Runs 'stmt' and retrieves the runtime profile and exec summary. If
      'has_access' is true, it verifies that no runtime profile or exec summary are
      returned. If 'close_operation' is true, make sure the operation is closed before
      retrieving the profile and exec summary."""
        from tests.hs2.test_hs2 import TestHS2
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = stmt
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        TestHS2.check_response(execute_statement_resp)

        if close_operation:
            close_operation_req = TCLIService.TCloseOperationReq()
            close_operation_req.operationHandle = execute_statement_resp.operationHandle
            TestHS2.check_response(
                self.hs2_client.CloseOperation(close_operation_req))

        get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq()
        get_profile_req.operationHandle = execute_statement_resp.operationHandle
        get_profile_req.sessionHandle = self.session_handle
        get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req)

        if has_access:
            TestHS2.check_response(get_profile_resp)
            assert "Plan: " in get_profile_resp.profile
        else:
            assert "User %s is not authorized to access the runtime profile or "\
                "execution summary." % (getuser()) in str(get_profile_resp)

        exec_summary_req = ImpalaHiveServer2Service.TGetExecSummaryReq()
        exec_summary_req.operationHandle = execute_statement_resp.operationHandle
        exec_summary_req.sessionHandle = self.session_handle
        exec_summary_resp = self.hs2_client.GetExecSummary(exec_summary_req)

        if has_access:
            TestHS2.check_response(exec_summary_resp)
        else:
            assert "User %s is not authorized to access the runtime profile or "\
                "execution summary." % (getuser()) in str(exec_summary_resp)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=" +
        SENTRY_CONFIG_FILE,
        catalogd_args="--sentry_config=" + SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(prefix="test_deprecated_none_",
                                        dir=os.getenv("LOG_DIR")))
    def test_deprecated_flag_doesnt_show(self):
        assert_no_files_in_dir_contain(
            self.impala_log_dir, "authorization_policy_file " +
            "flag is deprecated. Object Ownership feature is not supported")

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--server_name=server1\
      --authorization_policy_file=%s\
      --authorization_policy_provider_class=%s" % (
        AUTH_POLICY_FILE,
        "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider"
    ),
                                      impala_log_dir=tempfile.mkdtemp(
                                          prefix="test_deprecated_",
                                          dir=os.getenv("LOG_DIR")))
    def test_deprecated_flags(self):
        assert_file_in_dir_contains(
            self.impala_log_dir, "authorization_policy_file flag" +
            " is deprecated. Object Ownership feature is not supported")

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s" %
        SENTRY_CONFIG_FILE,
        catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_",
                                        dir=os.getenv("LOG_DIR")))
    def test_catalog_restart(self, unique_role):
        """IMPALA-7713: Tests that a catalogd restart when authorization is enabled should
    reset the previous privileges stored in impalad's catalog to avoid stale privilege
    data in the impalad's catalog."""
        def assert_privileges():
            result = self.client.execute("show grant role %s_foo" %
                                         unique_role)
            TestAuthorization._check_privileges(
                result,
                [["database", "functional", "", "", "", "all", "false"]])

            result = self.client.execute("show grant role %s_bar" %
                                         unique_role)
            TestAuthorization._check_privileges(
                result,
                [["database", "functional_kudu", "", "", "", "all", "false"]])

            result = self.client.execute("show grant role %s_baz" %
                                         unique_role)
            TestAuthorization._check_privileges(
                result,
                [["database", "functional_avro", "", "", "", "all", "false"]])

        self.role_cleanup(unique_role)
        try:
            self.client.execute("create role %s_foo" % unique_role)
            self.client.execute("create role %s_bar" % unique_role)
            self.client.execute("create role %s_baz" % unique_role)
            self.client.execute(
                "grant all on database functional to role %s_foo" %
                unique_role)
            self.client.execute(
                "grant all on database functional_kudu to role %s_bar" %
                unique_role)
            self.client.execute(
                "grant all on database functional_avro to role %s_baz" %
                unique_role)

            assert_privileges()
            self._start_impala_cluster([
                "--catalogd_args=--sentry_config=%s" % SENTRY_CONFIG_FILE,
                "--restart_catalogd_only"
            ])
            assert_privileges()
        finally:
            self.role_cleanup(unique_role)

    def role_cleanup(self, role_name_match):
        """Cleans up any roles that match the given role name."""
        for role_name in self.client.execute("show roles").data:
            if role_name_match in role_name:
                self.client.execute("drop role %s" % role_name)

    @staticmethod
    def _check_privileges(result, expected):
        def columns(row):
            cols = row.split("\t")
            return cols[0:len(cols) - 1]

        assert map(columns, result.data) == expected

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s" %
        SENTRY_CONFIG_FILE,
        catalogd_args="--sentry_config=%s" % SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(prefix="test_catalog_restart_",
                                        dir=os.getenv("LOG_DIR")))
    def test_catalog_object(self, unique_role):
        """IMPALA-7721: Tests /catalog_object web API for principal and privilege"""
        self.role_cleanup(unique_role)
        try:
            self.client.execute("create role %s" % unique_role)
            self.client.execute(
                "grant select on database functional to role %s" % unique_role)
            for service in [
                    self.cluster.catalogd.service,
                    self.cluster.get_first_impalad().service
            ]:
                obj_dump = service.get_catalog_object_dump(
                    "PRINCIPAL", "%s.ROLE" % unique_role)
                assert "catalog_version" in obj_dump

                # Get the privilege associated with that principal ID.
                principal_id = re.search(r"principal_id \(i32\) = (\d+)",
                                         obj_dump)
                assert principal_id is not None
                obj_dump = service.get_catalog_object_dump(
                    "PRIVILEGE",
                    urllib.quote(
                        "server=server1->db=functional->action=select->grantoption=false.%s.ROLE"
                        % principal_id.group(1)))
                assert "catalog_version" in obj_dump

                # Get the principal that does not exist.
                obj_dump = service.get_catalog_object_dump(
                    "PRINCIPAL", "doesnotexist.ROLE")
                assert "CatalogException" in obj_dump

                # Get the privilege that does not exist.
                obj_dump = service.get_catalog_object_dump(
                    "PRIVILEGE",
                    urllib.quote(
                        "server=server1->db=doesntexist->action=select->grantoption=false.%s.ROLE"
                        % principal_id.group(1)))
                assert "CatalogException" in obj_dump
        finally:
            self.role_cleanup(unique_role)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s" %
        SENTRY_CONFIG_FILE,
        catalogd_args=
        "--sentry_config=%s --sentry_catalog_polling_frequency_s=3600" %
        SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(
            prefix="test_invalidate_metadata_sentry_unavailable_",
            dir=os.getenv("LOG_DIR")))
    def test_invalidate_metadata_sentry_unavailable(self, unique_role):
        """IMPALA-7824: Tests that running INVALIDATE METADATA when Sentry is unavailable
    should not cause Impala to hang."""
        self.role_cleanup(unique_role)
        try:
            group_name = grp.getgrnam(getuser()).gr_name
            self.client.execute("create role %s" % unique_role)
            self.client.execute("grant all on server to role %s" % unique_role)
            self.client.execute("grant role %s to group `%s`" %
                                (unique_role, group_name))

            self._stop_sentry_service()
            # Calling INVALIDATE METADATA when Sentry is unavailable should return an error.
            result = self.execute_query_expect_failure(self.client,
                                                       "invalidate metadata")
            result_str = str(result)
            assert "MESSAGE: CatalogException: Error refreshing authorization policy:" \
                   in result_str
            assert "CAUSED BY: ImpalaRuntimeException: Error refreshing authorization policy." \
                   " Sentry is unavailable. Ensure Sentry is up:" in result_str

            self._start_sentry_service(SENTRY_CONFIG_FILE)
            # Calling INVALIDATE METADATA after Sentry is up should not return an error.
            self.execute_query_expect_success(self.client,
                                              "invalidate metadata")
        finally:
            self.role_cleanup(unique_role)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="--server_name=server1 --sentry_config=%s" %
        SENTRY_CONFIG_FILE,
        catalogd_args=
        "--sentry_config=%s --sentry_catalog_polling_frequency_s=3600 " %
        SENTRY_CONFIG_FILE,
        impala_log_dir=tempfile.mkdtemp(prefix="test_refresh_authorization_",
                                        dir=os.getenv("LOG_DIR")))
    def test_refresh_authorization(self, unique_role):
        """Tests refresh authorization statement by adding and removing roles and privileges
       externally. The long Sentry polling is used so that any authorization metadata
       updated externally does not get polled by Impala in order to test an an explicit
       call to refresh authorization statement."""
        group_name = grp.getgrnam(getuser()).gr_name
        self.role_cleanup(unique_role)
        for sync_ddl in [1, 0]:
            query_options = {'sync_ddl': sync_ddl}
            clients = []
            if sync_ddl:
                # When sync_ddl is True, we want to ensure the changes are propagated to all
                # coordinators.
                for impalad in self.cluster.impalads:
                    clients.append(impalad.service.create_beeswax_client())
            else:
                clients.append(self.client)
            try:
                self.client.execute("create role %s" % unique_role)
                self.client.execute("grant role %s to group `%s`" %
                                    (unique_role, group_name))
                self.client.execute("grant refresh on server to %s" %
                                    unique_role)

                self.validate_refresh_authorization_roles(
                    unique_role, query_options, clients)
                self.validate_refresh_authorization_privileges(
                    unique_role, query_options, clients)
            finally:
                self.role_cleanup(unique_role)

    def validate_refresh_authorization_roles(self, unique_role, query_options,
                                             clients):
        """This method tests refresh authorization statement by adding and removing
       roles externally."""
        try:
            # Create two roles inside Impala.
            self.client.execute("create role %s_internal1" % unique_role)
            self.client.execute("create role %s_internal2" % unique_role)
            # Drop an existing role (_internal1) outside Impala.
            role = "%s_internal1" % unique_role
            subprocess.check_call([
                "/bin/bash", "-c",
                "%s/bin/sentryShell --conf %s/sentry-site.xml -dr -r %s" %
                (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role)
            ],
                                  stdout=sys.stdout,
                                  stderr=sys.stderr)

            result = self.execute_query_expect_success(self.client,
                                                       "show roles")
            assert any(role in x for x in result.data)
            self.execute_query_expect_success(self.client,
                                              "refresh authorization",
                                              query_options=query_options)
            for client in clients:
                result = self.execute_query_expect_success(
                    client, "show roles")
                assert not any(role in x for x in result.data)

            # Add a new role outside Impala.
            role = "%s_external" % unique_role
            subprocess.check_call([
                "/bin/bash", "-c",
                "%s/bin/sentryShell --conf %s/sentry-site.xml -cr -r %s" %
                (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"), role)
            ],
                                  stdout=sys.stdout,
                                  stderr=sys.stderr)

            result = self.execute_query_expect_success(self.client,
                                                       "show roles")
            assert not any(role in x for x in result.data)
            self.execute_query_expect_success(self.client,
                                              "refresh authorization",
                                              query_options=query_options)
            for client in clients:
                result = self.execute_query_expect_success(
                    client, "show roles")
                assert any(role in x for x in result.data)
        finally:
            for suffix in ["internal1", "internal2", "external"]:
                self.role_cleanup("%s_%s" % (unique_role, suffix))

    def validate_refresh_authorization_privileges(self, unique_role,
                                                  query_options, clients):
        """This method tests refresh authorization statement by adding and removing
       privileges externally."""
        # Grant select privilege outside Impala.
        subprocess.check_call([
            "/bin/bash", "-c",
            "%s/bin/sentryShell --conf %s/sentry-site.xml -gpr -p "
            "'server=server1->db=functional->table=alltypes->action=select' -r %s"
            % (os.getenv("SENTRY_HOME"), os.getenv("SENTRY_CONF_DIR"),
               unique_role)
        ],
                              stdout=sys.stdout,
                              stderr=sys.stderr)

        # Before refresh authorization, there should only be one refresh privilege.
        result = self.execute_query_expect_success(
            self.client, "show grant role %s" % unique_role)
        assert len(result.data) == 1
        assert any("refresh" in x for x in result.data)

        for client in clients:
            self.execute_query_expect_failure(
                client, "select * from functional.alltypes limit 1")

        self.execute_query_expect_success(self.client,
                                          "refresh authorization",
                                          query_options=query_options)

        for client in clients:
            # Ensure select privilege was granted after refresh authorization.
            result = self.execute_query_expect_success(
                client, "show grant role %s" % unique_role)
            assert len(result.data) == 2
            assert any("select" in x for x in result.data)
            assert any("refresh" in x for x in result.data)
            self.execute_query_expect_success(
                client, "select * from functional.alltypes limit 1")
Exemplo n.º 53
0
class TestAuthorizedProxy(CustomClusterTestSuite):
    def setup(self):
        host, port = (self.cluster.impalads[0].service.hostname,
                      self.cluster.impalads[0].service.hs2_port)
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

    def teardown(self):
        if self.socket:
            self.socket.close()
        shutil.rmtree(AUDIT_LOG_DIR, ignore_errors=True)

    def _execute_hs2_stmt(self, statement, verify=True):
        """
    Executes an hs2 statement

    :param statement: the statement to execute
    :param verify: If set to true, will thrown an exception on a failed hs2 execution
    :return: the result of execution
    """
        from tests.hs2.test_hs2 import TestHS2
        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = statement
        result = self.hs2_client.ExecuteStatement(execute_statement_req)
        if verify:
            TestHS2.check_response(result)
        return result

    def _open_hs2(self, user, configuration, verify=True):
        """
    Open a session with hs2

    :param user: the user to open the session
    :param configuration: the configuration for the session
    :param verify: If set to true, will thrown an exception on failed session open
    :return: the result of opening the session
    """
        from tests.hs2.test_hs2 import TestHS2
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = user
        open_session_req.configuration = configuration
        resp = self.hs2_client.OpenSession(open_session_req)
        if verify:
            TestHS2.check_response(resp)
        return resp

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="{0} --authorized_proxy_user_config=foo=bar;hue={1} ".
        format(SENTRY_IMPALAD_ARGS, getuser()),
        catalogd_args=SENTRY_CATALOGD_ARGS)
    def test_authorized_proxy_user_with_sentry(self, unique_role):
        """Tests authorized proxy user with Sentry using HS2."""
        self._test_authorized_proxy_with_sentry(unique_role,
                                                self._test_authorized_proxy)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="{0} --authorized_proxy_user_config=foo=bar;hue={1} ".
        format(RANGER_IMPALAD_ARGS, getuser()),
        catalogd_args=RANGER_CATALOGD_ARGS)
    def test_authorized_proxy_user_with_ranger(self):
        # This test fails due to bumping up the Ranger to a newer version.
        # TODO(fangyu.rao): Fix in a follow up commit.
        pytest.xfail("failed due to bumping up the Ranger to a newer version")
        """Tests authorized proxy user with Ranger using HS2."""
        self._test_authorized_proxy_with_ranger(self._test_authorized_proxy)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="{0} --authorized_proxy_user_config=hue=bar "
        "--authorized_proxy_group_config=foo=bar;hue={1}".format(
            SENTRY_IMPALAD_ARGS,
            grp.getgrgid(os.getgid()).gr_name),
        catalogd_args=SENTRY_CATALOGD_ARGS)
    def test_authorized_proxy_group_with_sentry(self, unique_role):
        """Tests authorized proxy group with Sentry using HS2."""
        self._test_authorized_proxy_with_sentry(unique_role,
                                                self._test_authorized_proxy)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="{0} --authorized_proxy_user_config=hue=bar "
        "--authorized_proxy_group_config=foo=bar;hue={1}".format(
            RANGER_IMPALAD_ARGS,
            grp.getgrgid(os.getgid()).gr_name),
        catalogd_args=RANGER_CATALOGD_ARGS)
    def test_authorized_proxy_group_with_ranger(self):
        # This test fails due to bumping up the Ranger to a newer version.
        # TODO(fangyu.rao): Fix in a follow up commit.
        pytest.xfail("failed due to bumping up the Ranger to a newer version")
        """Tests authorized proxy group with Ranger using HS2."""
        self._test_authorized_proxy_with_ranger(self._test_authorized_proxy)

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="{0} --authorized_proxy_user_config=foo=bar "
        "--authorized_proxy_group_config=foo=bar".format(SENTRY_IMPALAD_ARGS),
        catalogd_args=SENTRY_CATALOGD_ARGS)
    def test_no_matching_user_and_group_authorized_proxy_with_sentry(self):
        self._test_no_matching_user_and_group_authorized_proxy()

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args(
        impalad_args="{0} --authorized_proxy_user_config=foo=bar "
        "--authorized_proxy_group_config=foo=bar".format(RANGER_IMPALAD_ARGS),
        catalogd_args=RANGER_CATALOGD_ARGS)
    def test_no_matching_user_and_group_authorized_proxy_with_ranger(self):
        self._test_no_matching_user_and_group_authorized_proxy()

    def _test_no_matching_user_and_group_authorized_proxy(self):
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = "******"
        open_session_req.configuration = dict()
        open_session_req.configuration["impala.doas.user"] = "******"
        resp = self.hs2_client.OpenSession(open_session_req)
        assert "User 'hue' is not authorized to delegate to 'abc'" in str(resp)

    def _test_authorized_proxy_with_sentry(self, role, test_func):
        try:
            self.session_handle = self._open_hs2(getuser(),
                                                 dict()).sessionHandle
            self._execute_hs2_stmt("create role {0}".format(role))
            self._execute_hs2_stmt(
                "grant all on table tpch.lineitem to role {0}".format(role))
            self._execute_hs2_stmt("grant role {0} to group {1}".format(
                role,
                grp.getgrnam(getuser()).gr_name))
            self._execute_hs2_stmt("grant role {0} to group {1}".format(
                role,
                grp.getgrgid(os.getgid()).gr_name))
            test_func()
        finally:
            self.session_handle = self._open_hs2(getuser(),
                                                 dict()).sessionHandle
            self._execute_hs2_stmt(
                "grant all on server to role {0}".format(role))
            self._execute_hs2_stmt("grant role {0} to group {1}".format(
                role,
                grp.getgrnam(getuser()).gr_name))
            self._execute_hs2_stmt("drop role {0}".format(role))

    def _test_authorized_proxy_with_ranger(self, test_func):
        try:
            self.session_handle = self._open_hs2(RANGER_ADMIN_USER,
                                                 dict()).sessionHandle
            self._execute_hs2_stmt(
                "grant all on table tpch.lineitem to user {0}".format(
                    getuser()))
            test_func()
        finally:
            self.session_handle = self._open_hs2(RANGER_ADMIN_USER,
                                                 dict()).sessionHandle
            self._execute_hs2_stmt(
                "revoke all on table tpch.lineitem from user {0}".format(
                    getuser()))

    def _test_authorized_proxy(self):
        """End-to-end impersonation + authorization test. Expects authorization to be
       configured before running this test"""
        # TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
        # the module within this test function, rather than as a top-level import. This way
        # the tests in that module will not get pulled when executing this test suite. The fix
        # is to split the utility code out of the TestHS2 class and support HS2 as a first
        # class citizen in our test framework.
        from tests.hs2.test_hs2 import TestHS2

        # Try to query a table we are not authorized to access.
        self.session_handle = self._open_hs2("hue", {
            "impala.doas.user": getuser()
        }).sessionHandle
        bad_resp = self._execute_hs2_stmt("describe tpch_seq.lineitem", False)
        assert "User '%s' does not have privileges to access" % getuser() in \
               str(bad_resp)

        assert self._wait_for_audit_record(user=getuser(), impersonator="hue"), \
               "No matching audit event recorded in time window"

        # Now try the same operation on a table we are authorized to access.
        good_resp = self._execute_hs2_stmt("describe tpch.lineitem")
        TestHS2.check_response(good_resp)

        # Verify the correct user information is in the runtime profile.
        query_id = operation_id_to_query_id(
            good_resp.operationHandle.operationId)
        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self._verify_profile_user_fields(profile_page,
                                         effective_user=getuser(),
                                         delegated_user=getuser(),
                                         connected_user="******")

        # Try to delegate a user we are not authorized to delegate to.
        resp = self._open_hs2("hue", {"impala.doas.user": "******"}, False)
        assert "User 'hue' is not authorized to delegate to 'some_user'" in str(
            resp)

        # Create a new session which does not have a do_as_user and run a simple query.
        self.session_handle = self._open_hs2("hue", dict()).sessionHandle
        resp = self._execute_hs2_stmt("select 1")

        # Verify the correct user information is in the runtime profile. Since there is
        # no do_as_user the Delegated User field should be empty.
        query_id = operation_id_to_query_id(resp.operationHandle.operationId)

        profile_page = self.cluster.impalads[
            0].service.read_query_profile_page(query_id)
        self._verify_profile_user_fields(profile_page,
                                         effective_user="******",
                                         delegated_user="",
                                         connected_user="******")

    def _verify_profile_user_fields(self, profile_str, effective_user,
                                    connected_user, delegated_user):
        """Verifies the given runtime profile string contains the specified values for
       User, Connected User, and Delegated User"""
        assert "\n    User: {0}\n".format(effective_user) in profile_str
        assert "\n    Connected User: {0}\n".format(
            connected_user) in profile_str
        assert "\n    Delegated User: {0}\n".format(
            delegated_user) in profile_str

    def _wait_for_audit_record(self, user, impersonator, timeout_secs=30):
        """Waits until an audit log record is found that contains the given user and
       impersonator, or until the timeout is reached.
    """
        # The audit event might not show up immediately (the audit logs are flushed to disk
        # on regular intervals), so poll the audit event logs until a matching record is
        # found.
        start_time = time.time()
        while time.time() - start_time < timeout_secs:
            for audit_file_name in os.listdir(AUDIT_LOG_DIR):
                if self._find_matching_audit_record(audit_file_name, user,
                                                    impersonator):
                    return True
            time.sleep(1)
        return False

    def _find_matching_audit_record(self, audit_file_name, user, impersonator):
        with open(os.path.join(AUDIT_LOG_DIR,
                               audit_file_name)) as audit_log_file:
            for line in audit_log_file.readlines():
                json_dict = json.loads(line)
                if len(json_dict) == 0: continue
                if json_dict[min(json_dict)]["user"] == user and \
                    json_dict[min(json_dict)]["impersonator"] == impersonator:
                    return True
        return False
Exemplo n.º 54
0
class HBaseClient(object):
    def __init__(self, ip, port=9090):
        """
        建立与thrift server端的连接
        """
        # server端地址和端口设定
        self.__transport = TBufferedTransport(TSocket.TSocket(ip, port))
        # 设置传输协议
        protocol = TBinaryProtocol.TBinaryProtocol(self.__transport)
        # 客户端
        self.__client = Hbase.Client(protocol)
        # 打开连接
        self.__transport.open()

    def __del__(self):
        self.__transport.close()

    def get_tables(self):
        """
        获得所有表
        :return:表名列表
        """
        return self.__client.getTableNames()

    def create_table(self, table, *columns):
        """
        创建表格
        :param table:表名
        :param columns:列族名
        """
        func = lambda col: ColumnDescriptor(col)
        column_families = map(func, columns)
        self.__client.createTable(table, column_families)

    def put(self, table, row, columns):
        """
        添加记录
        :param table:表名
        :param row:行键
        :param columns:列名
        :return:
        """
        func = lambda k, v: Mutation(column=k, value=v)
        mutations = map(func, columns.items())
        self.__client.mutateRow(table, row, mutations)

    def delete(self, table, row, column):
        """
        删除记录
        :param table:表名
        :param row:行键
        """
        self.__client.deleteAll(table, row, column)

    def scan(self, table, start_row="", columns=None):
        """
        获得记录
        :param table: 表名
        :param start_row: 起始行
        :param columns: 列族
        :param attributes:
        """
        scanner = self.__client.scannerOpen(table, start_row, columns)
        func = lambda k, v: (k, v.value)
        while True:
            r = self.__client.scannerGet(scanner)
            if not r:
                break
            yield dict(map(func, r[0].columns.items()))
Exemplo n.º 55
0
def connect(server='localhost', port=9090):
    transport = TBufferedTransport(TSocket(server, int(port)))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Hbase.Client(protocol)
    return client
Exemplo n.º 56
0
class HS2TestSuite(ImpalaTestSuite):
    TEST_DB = 'hs2_db'

    HS2_V6_COLUMN_TYPES = [
        'boolVal', 'stringVal', 'byteVal', 'i16Val', 'i32Val', 'i64Val',
        'doubleVal', 'binaryVal'
    ]

    def setup(self):
        self.cleanup_db(self.TEST_DB)
        host, port = IMPALAD_HS2_HOST_PORT.split(":")
        self.socket = TSocket(host, port)
        self.transport = TBufferedTransport(self.socket)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.hs2_client = ImpalaHiveServer2Service.Client(self.protocol)

    def teardown(self):
        self.cleanup_db(self.TEST_DB)
        if self.socket:
            self.socket.close()

    @staticmethod
    def check_response(
            response,
            expected_status_code=TCLIService.TStatusCode.SUCCESS_STATUS,
            expected_error_prefix=None):
        assert response.status.statusCode == expected_status_code
        if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
           and expected_error_prefix is not None:
            assert response.status.errorMessage.startswith(
                expected_error_prefix)

    def close(self, op_handle):
        close_op_req = TCLIService.TCloseOperationReq()
        close_op_req.operationHandle = op_handle
        close_op_resp = self.hs2_client.CloseOperation(close_op_req)
        assert close_op_resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS

    def get_num_rows(self, result_set):
        # rows will always be set, so the only way to tell if we should use it is to see if
        # any columns are set
        if result_set.columns is None or len(result_set.columns) == 0:
            return len(result_set.rows)

        assert result_set.columns is not None
        for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
            typed_col = getattr(result_set.columns[0], col_type)
            if typed_col != None:
                return len(typed_col.values)

        assert False

    def fetch_at_most(self, handle, orientation, size, expected_num_rows=None):
        """Fetches at most size number of rows from the query identified by the given
    operation handle. Uses the given fetch orientation. Asserts that the fetch returns a
    success status, and that the number of rows returned is equal to given
    expected_num_rows (if given). It is only safe for expected_num_rows to be 0 or 1:
    Impala does not guarantee that a larger result set will be returned in one go. Use
    fetch_until() for repeated fetches."""
        assert expected_num_rows is None or expected_num_rows in (0, 1)
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = size
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        num_rows = size
        if expected_num_rows is not None:
            assert self.get_num_rows(
                fetch_results_resp.results) == expected_num_rows
        return fetch_results_resp

    def fetch_until(self, handle, orientation, size, expected_num_rows=None):
        """Tries to fetch exactly 'size' rows from the given query handle, with the given
    fetch orientation, by repeatedly issuing fetch(size - num rows already fetched)
    calls. Returns fewer than 'size' rows if either a fetch() returns 0 rows (indicating
    EOS) or 'expected_num_rows' rows are returned. If 'expected_num_rows' is set to None,
    it defaults to 'size', so that the effect is to both ask for and expect the same
    number of rows."""
        assert expected_num_rows is None or (size >= expected_num_rows)
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = size
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp)
        num_rows_fetched = self.get_num_rows(fetch_results_resp.results)
        if expected_num_rows is None: expected_num_rows = size
        while num_rows_fetched < expected_num_rows:
            # Always try to fetch at most 'size'
            fetch_results_req.maxRows = size - num_rows_fetched
            fetch_results_req.orientation = TCLIService.TFetchOrientation.FETCH_NEXT
            fetch_results_resp = self.hs2_client.FetchResults(
                fetch_results_req)
            HS2TestSuite.check_response(fetch_results_resp)
            last_fetch_size = self.get_num_rows(fetch_results_resp.results)
            assert last_fetch_size > 0
            num_rows_fetched += last_fetch_size

        assert num_rows_fetched == expected_num_rows

    def fetch_fail(self, handle, orientation, expected_error_prefix):
        """Attempts to fetch rows from the query identified by the given operation handle.
    Asserts that the fetch returns an error with an error message matching the given
    expected_error_prefix."""
        fetch_results_req = TCLIService.TFetchResultsReq()
        fetch_results_req.operationHandle = handle
        fetch_results_req.orientation = orientation
        fetch_results_req.maxRows = 100
        fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
        HS2TestSuite.check_response(fetch_results_resp,
                                    TCLIService.TStatusCode.ERROR_STATUS,
                                    expected_error_prefix)
        return fetch_results_resp

    def result_metadata(self, handle):
        """ Gets the schema for the query identified by the handle """
        req = TCLIService.TGetResultSetMetadataReq()
        req.operationHandle = handle
        resp = self.hs2_client.GetResultSetMetadata(req)
        HS2TestSuite.check_response(resp)
        return resp

    def column_results_to_string(self, columns):
        """Quick-and-dirty way to get a readable string to compare the output of a
    columnar-oriented query to its expected output"""
        formatted = ""
        num_rows = 0
        # Determine the number of rows by finding the type of the first column
        for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
            typed_col = getattr(columns[0], col_type)
            if typed_col != None:
                num_rows = len(typed_col.values)
                break

        for i in xrange(num_rows):
            row = []
            for c in columns:
                for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES:
                    typed_col = getattr(c, col_type)
                    if typed_col != None:
                        indicator = ord(typed_col.nulls[i / 8])
                        if indicator & (1 << (i % 8)):
                            row.append("NULL")
                        else:
                            row.append(str(typed_col.values[i]))
                        break
            formatted += (", ".join(row) + "\n")
        return (num_rows, formatted)

    def get_operation_status(self, operation_handle):
        """Executes GetOperationStatus with the given operation handle and returns the
    TGetOperationStatusResp"""
        get_operation_status_req = TCLIService.TGetOperationStatusReq()
        get_operation_status_req.operationHandle = operation_handle
        get_operation_status_resp = \
            self.hs2_client.GetOperationStatus(get_operation_status_req)
        return get_operation_status_resp

    def wait_for_operation_state(self, operation_handle, expected_state, \
                                 timeout = 10, interval = 1):
        """Waits for the operation to reach expected_state by polling GetOperationStatus every
    interval seconds, returning the TGetOperationStatusResp, or raising an assertion after
    timeout seconds."""
        start_time = time()
        while (time() - start_time < timeout):
            get_operation_status_resp = self.get_operation_status(
                operation_handle)
            HS2TestSuite.check_response(get_operation_status_resp)
            if get_operation_status_resp.operationState is expected_state:
                return get_operation_status_resp
            sleep(interval)
        assert False, 'Did not reach expected operation state %s in time, actual state was ' \
            '%s' % (expected_state, get_operation_status_resp.operationState)