def setup_class(cls): """Setup section that runs before each test suite""" cls.hive_client, cls.client = [None, None] # Create a Hive Metastore Client (used for executing some test SETUP steps metastore_host, metastore_port = pytest.config.option.metastore_server.split( ':') trans_type = 'buffered' if pytest.config.option.use_kerberos: trans_type = 'kerberos' cls.hive_transport = create_transport( host=metastore_host, port=metastore_port, service=pytest.config.option.hive_service_name, transport_type=trans_type) protocol = TBinaryProtocol.TBinaryProtocol(cls.hive_transport) cls.hive_client = ThriftHiveMetastore.Client(protocol) cls.hive_transport.open() # Create a connection to Impala. cls.client = cls.create_impala_client(IMPALAD) # Default query options are populated on demand. cls.default_query_options = {} cls.impalad_test_service = cls.create_impala_service() cls.hdfs_client = cls.create_hdfs_client() cls.filesystem_client = cls.hdfs_client if IS_S3: cls.filesystem_client = S3Client(S3_BUCKET_NAME) elif IS_ADLS: cls.filesystem_client = ADLSClient(ADLS_STORE_NAME)
def setup_class(cls): """Setup section that runs before each test suite""" cls.hive_client, cls.client = [None, None] # Create a Hive Metastore Client (used for executing some test SETUP steps metastore_host, metastore_port = pytest.config.option.metastore_server.split( ':') trans_type = 'buffered' if pytest.config.option.use_kerberos: trans_type = 'kerberos' cls.hive_transport = create_transport( host=metastore_host, port=metastore_port, service=pytest.config.option.hive_service_name, transport_type=trans_type) protocol = TBinaryProtocol.TBinaryProtocol(cls.hive_transport) cls.hive_client = ThriftHiveMetastore.Client(protocol) cls.hive_transport.open() # Create a connection to Impala. cls.client = cls.create_impala_client(IMPALAD) cls.impalad_test_service = ImpaladService(IMPALAD.split(':')[0]) if pytest.config.option.namenode_http_address is None: cls.hdfs_client = get_hdfs_client_from_conf(HDFS_CONF) else: host, port = pytest.config.option.namenode_http_address.split(":") cls.hdfs_client = get_hdfs_client()
def setup_class(cls): """Setup section that runs before each test suite""" cls.hive_client, cls.client, cls.hs2_client = [None, None, None] # Create a Hive Metastore Client (used for executing some test SETUP steps metastore_host, metastore_port = pytest.config.option.metastore_server.split( ':') trans_type = 'buffered' if pytest.config.option.use_kerberos: trans_type = 'kerberos' cls.hive_transport = create_transport( host=metastore_host, port=metastore_port, service=pytest.config.option.hive_service_name, transport_type=trans_type) protocol = TBinaryProtocol.TBinaryProtocol(cls.hive_transport) cls.hive_client = ThriftHiveMetastore.Client(protocol) cls.hive_transport.open() # Create a connection to Impala, self.client is Beeswax so that existing tests that # assume beeswax do not need modification (yet). cls.client = cls.create_impala_client(protocol='beeswax') try: cls.hs2_client = cls.create_impala_client(protocol='hs2') except Exception, e: # HS2 connection can fail for benign reasons, e.g. running with unsupported auth. LOG.info("HS2 connection setup failed, continuing...: {0}", e)
def get_schema(dbs, host_friendlyname, host, port=10001): try: transport = TSocket.TSocket(host, port) transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) global metastore_client metastore_client = ThriftHiveMetastore.Client(protocol) transport.open() data_dict = {} for db in dbs: data_dict[db] = {} tables = metastore_client.get_all_tables(db) for table in tables: data_dict[db][table] = [] print "HOST: {2} DB: {0} TABLE: {1}".format( db, table, host_friendlyname) for field in metastore_client.get_fields(db, table): data_dict[db][table].append(field) print field, field.name f = open("{0}_schema.out".format(host_friendlyname), 'w') f.write(str(data_dict)) finally: pass
def __init__(self, hms_client=None): if hms_client: self.hms_client = hms_client else: hive_transport = create_transport(host=metastore_host, port=metastore_port, service=service, transport_type=trans_type) protocol = TBinaryProtocol.TBinaryProtocol(hive_transport) self.hms_client = ThriftHiveMetastore.Client(protocol) hive_transport.open()
def __enter__(self): try: from thrift import Thrift from thrift.transport import TSocket from thrift.transport import TTransport from thrift.protocol import TBinaryProtocol from hive_metastore import ThriftHiveMetastore config = luigi.configuration.get_config() host = config.get('hive', 'metastore_host') port = config.getint('hive', 'metastore_port') transport = TSocket.TSocket(host, port) transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) transport.open() self.transport = transport return ThriftHiveMetastore.Client(protocol) except ImportError, e: raise Exception('Could not import Hive thrift library:' + str(e))
def __enter__(self): try: from thrift.transport import TSocket from thrift.transport import TTransport from thrift.protocol import TBinaryProtocol # Note that this will only work with a CDH release. # This uses the thrift bindings generated by the ThriftHiveMetastore service in Beeswax. # If using the Apache release of Hive this import will fail. from hive_metastore import ThriftHiveMetastore config = luigi.configuration.get_config() host = config.get('hive', 'metastore_host') port = config.getint('hive', 'metastore_port') transport = TSocket.TSocket(host, port) transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) transport.open() self.transport = transport return ThriftHiveMetastore.Client(protocol) except ImportError as e: raise Exception('Could not import Hive thrift library:' + str(e))
def __init__(self, host, port): self.logger = logging.getLogger(__name__) if not host: host = environ.get("HMS_HOST") if not host: host = 'localhost' if ':' in host: parts = host.split(':') host = parts[0] port = int(parts[1]) if not port: port = environ.get("HMS_PORT") if not port: port = DEFAULT_PORT self.__transport = TTransport.TBufferedTransport( TSocket.TSocket(host, int(port))) protocol = TBinaryProtocol.TBinaryProtocol(self.__transport) self.__client = ThriftHiveMetastore.Client(protocol)
dest="metastore_hostport", default="localhost:9083", help="Metastore hostport to wait for.") parser.add_option( "--transport", dest="transport", default="buffered", help="Transport to use for connecting to HiveServer2. Valid values: " "'buffered', 'kerberos', 'plain_sasl'.") options, args = parser.parse_args() metastore_host, metastore_port = options.metastore_hostport.split(':') hive_transport = create_transport(metastore_host, metastore_port, "hive", options.transport) protocol = TBinaryProtocol.TBinaryProtocol(hive_transport) hive_client = ThriftHiveMetastore.Client(protocol) # Try to connect to the Hive metastore now = time.time() TIMEOUT_SECONDS = 30.0 while time.time() - now < TIMEOUT_SECONDS: try: hive_transport.open() resp = hive_client.get_database("default") if resp is not None: print "Metastore service is up at %s." % options.metastore_hostport exit(0) except Exception as e: if "SASL" in e.message: # Bail out on SASL failures print "SASL failure when attempting connection:" raise