class JobServerApi(object): def __init__(self, oozie_url): self._url = posixpath.join(oozie_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False # To store user info self._thread_local = threading.local() def __str__(self): return "JobServerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, "username"): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self, **kwargs): return self._root.get("healthz", params=kwargs, headers={"Accept": _TEXT_CONTENT_TYPE}) def submit_job(self, appName, classPath, data, context=None, sync=False): params = {"appName": appName, "classPath": classPath, "sync": sync} if context: params["context"] = context return self._root.post("jobs" % params, params=params, data=data, contenttype=_BINARY_CONTENT_TYPE) def job(self, job_id): return self._root.get("jobs/%s" % job_id, headers={"Accept": _JSON_CONTENT_TYPE}) def jobs(self, **kwargs): return self._root.get("jobs", params=kwargs, headers={"Accept": _JSON_CONTENT_TYPE}) def create_context(self, name, **kwargs): return self._root.post("contexts/%s" % name, params=kwargs, contenttype=_BINARY_CONTENT_TYPE) def contexts(self, **kwargs): return self._root.get("contexts", params=kwargs, headers={"Accept": _JSON_CONTENT_TYPE}) def delete_context(self, name, **kwargs): return self._root.delete("contexts/%s" % name) def upload_jar(self, app_name, data): return self._root.post("jars/%s" % app_name, data=data, contenttype=_BINARY_CONTENT_TYPE) def jars(self, **kwargs): return self._root.get("jars", params=kwargs, headers={"Accept": _JSON_CONTENT_TYPE})
class JobServerApi(object): def __init__(self, oozie_url): self._url = posixpath.join(oozie_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False self._thread_local = threading.local() def __str__(self): return "JobServerApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def create_session(self, **kwargs): return self._root.post('sessions', data=json.dumps(kwargs), contenttype='application/json') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session)
class JobServerApi(object): def __init__(self, oozie_url): self._url = posixpath.join(oozie_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False self._thread_local = threading.local() def __str__(self): return "JobServerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def create_session(self, **kwargs): return self._root.post('sessions', data=json.dumps(kwargs), contenttype='application/json') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session)
class THttpClient(TTransportBase): """ HTTP transport mode for Thrift. HTTPS and Kerberos support with Request. e.g. mode = THttpClient('http://hbase-thrift-v1.com:9090') mode = THttpClient('http://hive-localhost:10001/cliservice') """ def __init__(self, base_url): self._base_url = base_url self._client = HttpClient(self._base_url, logger=LOG) self._data = None self._headers = None self._wbuf = buffer_writer() def open(self): pass def set_kerberos_auth(self, service="HTTP"): self._client.set_kerberos_auth(service=service) def set_basic_auth(self, username, password): self._client.set_basic_auth(username, password) def set_bearer_auth(self, token): self._client.set_bearer_auth(token) def set_verify(self, verify=True): self._client.set_verify(verify) def close(self): self._headers = None # Close session too? def isOpen(self): return self._client is not None def setTimeout(self, ms): if not self._headers: self._headers = {} self._headers.update(timeout=str(int(ms / 1000))) def setCustomHeaders(self, headers): self._headers = headers def read(self, sz): return self._data def write(self, buf): self._wbuf.write(buf) def flush(self): data = self._wbuf.getvalue() self._wbuf = buffer_writer() # POST self._root = Resource(self._client) self._data = self._root.post('', data=data, headers=self._headers)
class THttpClient(TTransportBase): """ HTTP transport mode for Thrift. HTTPS and Kerberos support with Request. e.g. mode = THttpClient('http://hbase-thrift-v1.com:9090') mode = THttpClient('http://hive-localhost:10001/cliservice') """ def __init__(self, base_url, cert_validate=True): self._base_url = base_url self._client = HttpClient(self._base_url, logger=LOG, cert_validate=cert_validate) self._data = None self._headers = None self._wbuf = StringIO() def open(self): pass def set_basic_auth(self, username, password): self._client.set_basic_auth(username, password) def set_kerberos_auth(self): self._client.set_kerberos_auth() def close(self): self._headers = None # Close session too? def isOpen(self): return self._client is not None def setTimeout(self, ms): pass def setCustomHeaders(self, headers): self._headers = headers def read(self, sz): return self._data def write(self, buf): self._wbuf.write(buf) def flush(self): if self.isOpen(): self.close() self.open() data = self._wbuf.getvalue() self._wbuf = StringIO() # POST self._root = Resource(self._client) self._data = self._root.post('', data=data)
class OptimizerRestClient(OptimizerClient): def __init__(self, user, api_url=None, auth_key=None, auth_key_secret=None, tenant_id='hue'): self.user = user self._tenant_id = tenant_id self._api_url = (api_url or get_optimizer_url()).strip('/') self._client = HttpClient(self._api_url, logger=LOG) self._root = Resource(self._client) self._api = MockApiLib() def _call(self, path, data): try: return self._root.post(path, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) except: LOG.exception('Error calling Optimize service') return {}
def test_avoid_concat_unicode_with_ascii(): ''' Without smart_unicode() we get: UnicodeDecodeError: 'ascii' codec can't decode byte 0xd0 in position 39: ordinal not in range(128) ''' with patch('desktop.lib.rest.http_client.HttpClient') as HttpClient: with patch('desktop.lib.rest.resource.LOG.exception') as exception: client = HttpClient() client.execute = Mock( return_value=Mock( headers={}, content='Good' ) ) resource = Resource(client) resp = resource.get('/user/domain/') assert_false(exception.called) assert_equal('Good', resp) client.execute = Mock( return_value=Mock( headers={}, content='{"FileStatus":{"pathSuffix":"/user/hue/Джейкоб","type":"DIRECTORY","length":0,"owner":"admin","group":"admin","permission":"755","accessTime":0,"modificationTime":1578458822492,"blockSize":0,"replication":0,"childrenNum":0,"fileId":149137,"storagePolicy":0}}' ) ) resp = resource.get('/user/domain/Джейкоб') assert_true(client.execute.called) assert_false(exception.called) # Should not fail anymore now resp = resource.post('/user/domain/Джейкоб', data=json.dumps({'€': '€'})) assert_true(client.execute.called) assert_false(exception.called)
class DataWarehouse2Api(object): def __init__(self, user=None): self._api_url = '%s/dw' % K8S.API_URL.get().rstrip('/') self.user = user self._client = HttpClient(self._api_url, logger=LOG) self._client.set_verify(False) self._root = Resource(self._client) def list_k8_clusters(self): clusters = self._root.post('listClusters', contenttype="application/json") for cluster in clusters['clusters']: cluster['clusterName'] = cluster['name'] cluster['workersGroupSize'] = cluster['workerReplicas'] cluster['instanceType'] = '%(workerCpuCores)s CPU %(workerMemoryInGib)s Memory' % cluster cluster['progress'] = '%(workerReplicasOnline)s / %(workerReplicas)s' % cluster cluster['creationDate'] = str(datetime.now()) return clusters def create_cluster(self, cloud_provider, cluster_name, cdh_version, public_key, instance_type, environment_name, workers_group_size=3, namespace_name=None, cloudera_manager_username='******', cloudera_manager_password='******'): data = { 'clusterName': cluster_name, 'cdhVersion': cdh_version or 'CDH6.3', 'workerCpuCores': 1, 'workerMemoryInGib': 1, 'workerReplicas': workers_group_size, 'workerAutoResize': False } return self._root.post('createCluster', data=json.dumps(data), contenttype="application/json") def list_clusters(self): clusters = self._root.post('listClusters', contenttype="application/json") for cluster in clusters['clusters']: cluster['clusterName'] = cluster['name'] cluster['workersGroupSize'] = cluster['workerReplicas'] cluster['instanceType'] = 'Data Warehouse'# '%(workerCpuCores)s CPU %(workerMemoryInGib)s Memory' % cluster cluster['progress'] = '%(workerReplicasOnline)s / %(workerReplicas)s' % cluster cluster['creationDate'] = str(datetime.now()) return clusters def delete_cluster(self, cluster_id): data = json.dumps({'clusterName': cluster_id}) return { 'result': self._root.post('deleteCluster', data=data, contenttype="application/json") } def describe_cluster(self, cluster_id): data = json.dumps({'clusterName': cluster_id}) data = self._root.post('describeCluster', data=data, contenttype="application/json") data['cluster']['clusterName'] = data['cluster']['name'] data['cluster']['cdhVersion'] = 'Data Warehouse' return data def update_cluster(self, **params): return self._root.post('updateCluster', data=json.dumps(params), contenttype="application/json")
class JobServerApi(object): def __init__(self, livy_url): self._url = posixpath.join(livy_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = SECURITY_ENABLED.get() self._csrf_enabled = CSRF_ENABLED.get() self._thread_local = threading.local() if self.security_enabled: self._client.set_kerberos_auth() if self.csrf_enabled: self._client.set_headers({'X-Requested-By' : 'hue'}) self._client.set_verify(SSL_CERT_CA_VERIFY.get()) def __str__(self): return "JobServerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def csrf_enabled(self): return self._csrf_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def get_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('sessions/%s/log' % uuid, params=params) return '\n'.join(response['log']) def create_session(self, **properties): properties['proxyUser'] = self.user return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_sessions(self): return self._root.get('sessions') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def get_statements(self, uuid): return self._root.get('sessions/%s/statements' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session) def close(self, uuid): return self._root.delete('sessions/%s' % uuid) def get_batches(self): return self._root.get('batches') def submit_batch(self, properties): properties['proxyUser'] = self.user return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get('batches/%s' % uuid) def get_batch_status(self, uuid): response = self._root.get('batches/%s/state' % uuid) return response['state'] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('batches/%s/log' % uuid, params=params) return '\n'.join(response['log']) def close_batch(self, uuid): return self._root.delete('batches/%s' % uuid)
class JobServerApi(object): def __init__(self, oozie_url): self._url = posixpath.join(oozie_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False self._thread_local = threading.local() def __str__(self): return "JobServerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def create_session(self, **kwargs): return self._root.post('sessions', data=json.dumps(kwargs), contenttype='application/json') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session) def get_batches(self): return self._root.get('batches') def submit_batch(self, properties): return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get('batches/%s' % uuid) def get_batch_status(self, uuid): response = self._root.get('batches/%s/state' % uuid) return response['state'] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('batches/%s/log' % uuid, params=params) return '\n'.join(response['log']) def delete_batch(self, uuid): return self._root.delete('batches/%s' % uuid)
class JobServerApi(object): def __init__(self, oozie_url): self._url = posixpath.join(oozie_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False # To store user info self._thread_local = threading.local() def __str__(self): return "JobServerApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self, **kwargs): return self._root.get('healthz', params=kwargs, headers={'Accept': _TEXT_CONTENT_TYPE}) def submit_job(self, appName, classPath, data, context=None, sync=False): params = {'appName': appName, 'classPath': classPath, 'sync': sync} if context: params['context'] = context return self._root.post('jobs' % params, params=params, data=data, contenttype=_BINARY_CONTENT_TYPE) def job(self, job_id): return self._root.get('jobs/%s' % job_id, headers={'Accept': _JSON_CONTENT_TYPE}) def jobs(self, **kwargs): return self._root.get('jobs', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def create_context(self, name, **kwargs): return self._root.post('contexts/%s' % name, params=kwargs, contenttype=_BINARY_CONTENT_TYPE) def contexts(self, **kwargs): return self._root.get('contexts', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE}) def delete_context(self, name, **kwargs): return self._root.delete('contexts/%s' % name) def upload_jar(self, app_name, data): return self._root.post('jars/%s' % app_name, data=data, contenttype=_BINARY_CONTENT_TYPE) def jars(self, **kwargs): return self._root.get('jars', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})
class OozieApi(object): def __init__(self, oozie_url, security_enabled=False): self._url = posixpath.join(oozie_url, API_VERSION) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._root = Resource(self._client) self._security_enabled = security_enabled # To store user info self._thread_local = threading.local() def __str__(self): return "OozieApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER def setuser(self, user): """Return the previous user""" prev = self.user self._thread_local.user = user return prev def _get_params(self): if self.security_enabled: return { 'doAs': self.user, 'timezone': TIME_ZONE.get() } return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() } def _get_oozie_properties(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ('name', 'user', 'group', 'status') def get_jobs(self, jobtype, offset=None, cnt=None, **kwargs): """ Get a list of Oozie jobs. jobtype is 'wf', 'coord' Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) params['jobtype'] = jobtype filter_list = [ ] for key, val in kwargs.iteritems(): if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) # Send the request resp = self._root.get('jobs', params) if jobtype == 'wf': wf_list = WorkflowList(self, resp, filters=kwargs) elif jobtype == 'coord': wf_list = CoordinatorList(self, resp, filters=kwargs) else: wf_list = BundleList(self, resp, filters=kwargs) return wf_list def get_workflows(self, offset=None, cnt=None, **kwargs): return self.get_jobs('wf', offset, cnt, **kwargs) def get_coordinators(self, offset=None, cnt=None, **kwargs): return self.get_jobs('coord', offset, cnt, **kwargs) def get_bundles(self, offset=None, cnt=None, **kwargs): return self.get_jobs('bundle', offset, cnt, **kwargs) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid): params = self._get_params() params.update({'len': -1}) resp = self._root.get('job/%s' % (jobid,), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params['show'] = 'definition' xml = self._root.get('job/%s' % (jobid,), params) return xml def get_job_log(self, jobid): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params['show'] = 'log' xml = self._root.get('job/%s' % (jobid,), params) return xml def get_action(self, action_id): if 'C@' in action_id: Klass = CoordinatorAction elif 'B@' in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get('job/%s' % (action_id,), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun'): msg = 'Invalid oozie job action: %s' % (action,) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params['action'] = action if parameters is not None: params.update(parameters) return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = { 'oozie.wf.application.path': application_path, 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp['id'] def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params['action'] = 'rerun' return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get('admin/build-version', params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get('admin/instrumentation', params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get('admin/configuration', params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get('admin/status', params) return resp
class FlinkSqlClient(): ''' Implements https://github.com/ververica/flink-sql-gateway Could be a pip module or sqlalchemy dialect in the future. ''' def __init__(self, user, api_url): self.user = user self._url = posixpath.join(api_url + '/' + _API_VERSION + '/') self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) def __str__(self): return "FlinkClient at %s" % (self._url, ) def info(self): return self._root.get('info') def create_session(self, **properties): data = { "session_name": "test", # optional "planner": "blink", # required, "old"/"blink" "execution_type": "streaming", # required, "batch"/"streaming" "properties": { # optional "key": "value" } } data.update(properties) return self._root.post('sessions', data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def session_heartbeat(self, session_id): return self._root.post('sessions/%(session_id)s/heartbeat' % {'session_id': session_id}) def execute_statement(self, session_id, statement): data = { "statement": statement, # required "execution_timeout": "" # execution time limit in milliseconds, optional, but required for stream SELECT ? } return self._root.post('sessions/%(session_id)s/statements' % {'session_id': session_id}, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_status(self, session_id, job_id): return self._root.get( 'sessions/%(session_id)s/jobs/%(job_id)s/status' % { 'session_id': session_id, 'job_id': job_id }) def fetch_results(self, session_id, job_id, token=0): return self._root.get( 'sessions/%(session_id)s/jobs/%(job_id)s/result/%(token)s' % { 'session_id': session_id, 'job_id': job_id, 'token': token }) def close_statement(self, session_id, job_id): return self._root.delete('sessions/%(session_id)s/jobs/%(job_id)s' % { 'session_id': session_id, 'job_id': job_id, }) def close_session(self, session_id): return self._root.delete('sessions/%(session_id)s' % { 'session_id': session_id, })
class OozieApi(object): def __init__(self, oozie_url, security_enabled=False): self._url = posixpath.join(oozie_url, API_VERSION) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._root = Resource(self._client) self._security_enabled = security_enabled # To store user info self._thread_local = threading.local() def __str__(self): return "OozieApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): try: return self._thread_local.user except AttributeError: return DEFAULT_USER def setuser(self, user): """Return the previous user""" prev = self.user self._thread_local.user = user return prev def _get_params(self): if self.security_enabled: return {"doAs": self.user} return {"user.name": DEFAULT_USER, "doAs": self.user} VALID_JOB_FILTERS = ("name", "user", "group", "status") def get_jobs(self, offset=None, cnt=None, **kwargs): """ get_jobs(offset=None, cnt=None, **kwargs) -> WorkflowList Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params["offset"] = str(offset) if cnt is not None: params["len"] = str(cnt) filter_list = [] for key, val in kwargs: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append("%s=%s" % (key, val)) params["filter"] = ";".join(filter_list) # Send the request resp = self._root.get("jobs", params) wf_list = WorkflowList(self, resp, filters=kwargs) return wf_list def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get("job/%s" % (jobid,), params) wf = Workflow(self, resp) return wf def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params["show"] = "definition" xml = self._root.get("job/%s" % (jobid,), params) return xml def get_job_log(self, jobid): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params["show"] = "log" xml = self._root.get("job/%s" % (jobid,), params) return xml def job_control(self, jobid, action): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ("start", "suspend", "resume", "kill"): msg = "Invalid oozie job action: %s" % (action,) LOG.error(msg) raise ValueError(msg) params = self._get_params() params["action"] = action self._root.put("job/%s" % (jobid,), params) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, username, properties=None) -> jobid Submit a job to Oozie. May raise PopupException. """ defaults = {"oozie.wf.application.path": application_path, "user.name": self.user} if properties is not None: defaults.update(properties) properties = defaults else: properties = defaults params = self._get_params() resp = self._root.post("jobs", params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp["id"] def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get("admin/build-version", params) return resp def get_instrumentation(self): """ get_instrumentation() -> Oozie instrumentation (dictionary) """ params = self._get_params() resp = self._root.get("admin/instrumentation", params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get("admin/configuration", params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get("admin/status", params) return resp
class OozieApi(object): def __init__(self, oozie_url, user, security_enabled=False, api_version=API_VERSION, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, api_version) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) self._security_enabled = security_enabled # To store username info if hasattr(user, 'username'): self.user = user.username else: self.user = user self.api_version = api_version def __str__(self): return "OozieApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def _get_params(self): if self.security_enabled: return {'doAs': self.user, 'timezone': TIME_ZONE.get()} return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() } def _get_oozie_properties(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime', 'text') VALID_LOG_FILTERS = set(('recent', 'limit', 'loglevel', 'text')) def get_jobs(self, jobtype, offset=None, cnt=None, filters=None): """ Get a list of Oozie jobs. Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = [] params['jobtype'] = jobtype filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError( '"%s" is not a valid filter for selecting jobs' % (key, )) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) # Send the request resp = self._root.get('jobs', params) if jobtype == 'wf': wf_list = WorkflowList(self, resp, filters=filters) elif jobtype == 'coord': wf_list = CoordinatorList(self, resp, filters=filters) else: wf_list = BundleList(self, resp, filters=filters) return wf_list def get_workflows(self, offset=None, cnt=None, filters=None): return self.get_jobs('wf', offset, cnt, filters) def get_coordinators(self, offset=None, cnt=None, filters=None): return self.get_jobs('coord', offset, cnt, filters) def get_bundles(self, offset=None, cnt=None, filters=None): return self.get_jobs('bundle', offset, cnt, filters) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get('job/%s' % (jobid, ), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid, offset=None, cnt=None, filters=None): params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = {} params.update({'order': 'desc'}) filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError( '"%s" is not a valid filter for selecting jobs' % (key, )) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) resp = self._root.get('job/%s' % (jobid, ), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get('job/%s' % (jobid, ), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params['show'] = 'definition' return self._root.get('job/%s' % (jobid, ), params) def get_job_log(self, jobid, logfilter=None): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params['show'] = 'log' filter_list = [] if logfilter is None: logfilter = [] for key, val in logfilter: if key not in OozieApi.VALID_LOG_FILTERS: raise ValueError('"%s" is not a valid filter for job logs' % (key, )) filter_list.append('%s=%s' % (key, val)) params['logfilter'] = ';'.join(filter_list) return self._root.get('job/%s' % (jobid, ), params) def get_job_status(self, jobid): params = self._get_params() params['show'] = 'status' xml = self._root.get('job/%s' % (jobid, ), params) return xml def get_action(self, action_id): if 'C@' in action_id: Klass = CoordinatorAction elif 'B@' in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get('job/%s' % (action_id, ), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun', 'change', 'ignore', 'update'): msg = 'Invalid oozie job action: %s' % (action, ) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params['action'] = action if parameters is not None: params.update(parameters) return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = { 'oozie.wf.application.path': application_path, 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp['id'] def dryrun(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() params['action'] = 'dryrun' return self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params['action'] = 'rerun' return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get('admin/build-version', params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get('admin/instrumentation', params) return resp def get_metrics(self): params = self._get_params() resp = self._root.get('admin/metrics', params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get('admin/configuration', params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get('admin/status', params) return resp def get_oozie_slas(self, **kwargs): """ filter= app_name=my-sla-app id=0000002-131206135002457-oozie-oozi-W nominal_start=2013-06-18T00:01Z nominal_end=2013-06-23T00:01Z """ params = self._get_params() params['filter'] = ';'.join( ['%s=%s' % (key, val) for key, val in kwargs.iteritems()]) resp = self._root.get('sla', params) return resp['slaSummaryList']
class JobServerApi(object): def __init__(self, livy_url): self._url = posixpath.join(livy_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = SECURITY_ENABLED.get() self._thread_local = threading.local() if self.security_enabled: self._client.set_kerberos_auth() def __str__(self): return "JobServerApi at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def get_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('sessions/%s/log' % uuid, params=params) return '\n'.join(response['log']) def create_session(self, **properties): properties['proxyUser'] = self.user return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_sessions(self): return self._root.get('sessions') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def get_statements(self, uuid): return self._root.get('sessions/%s/statements' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session) def close(self, uuid): return self._root.delete('sessions/%s' % uuid) def get_batches(self): return self._root.get('batches') def submit_batch(self, properties): properties['proxyUser'] = self.user return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get('batches/%s' % uuid) def get_batch_status(self, uuid): response = self._root.get('batches/%s/state' % uuid) return response['state'] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('batches/%s/log' % uuid, params=params) return '\n'.join(response['log']) def close_batch(self, uuid): return self._root.delete('batches/%s' % uuid)
class JobServerApi(object): def __init__(self, oozie_url): self._url = posixpath.join(oozie_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = False self._thread_local = threading.local() def __str__(self): return "JobServerApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, "username"): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get("sessions") def get_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params["from"] = startFrom if size is not None: params["size"] = size response = self._root.get("sessions/%s/log" % uuid, params=params) return "\n".join(response["log"]) def create_session(self, **properties): properties["proxyUser"] = self.user return self._root.post("sessions", data=json.dumps(properties), contenttype="application/json") def get_session(self, uuid): return self._root.get("sessions/%s" % uuid) def submit_statement(self, uuid, statement): data = {"code": statement} return self._root.post("sessions/%s/statements" % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {"code": statement} return self._root.post("sessions/%s/inspect" % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get("sessions/%s/statements/%s" % (session, statement)) def cancel(self, session): return self._root.post("sessions/%s/interrupt" % session) def close(self, uuid): return self._root.delete("sessions/%s" % uuid) def get_batches(self): return self._root.get("batches") def submit_batch(self, properties): properties["proxyUser"] = self.user return self._root.post("batches", data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get("batches/%s" % uuid) def get_batch_status(self, uuid): response = self._root.get("batches/%s/state" % uuid) return response["state"] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params["from"] = startFrom if size is not None: params["size"] = size response = self._root.get("batches/%s/log" % uuid, params=params) return "\n".join(response["log"]) def close_batch(self, uuid): return self._root.delete("batches/%s" % uuid)
class ManagerApi(object): """ https://cloudera.github.io/cm_api/ """ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False): self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION) self._username = get_navigator_auth_username() self._password = get_navigator_auth_password() self.user = user self._client = HttpClient(self._api_url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() else: self._client.set_basic_auth(self._username, self._password) self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) def has_service(self, service_name, cluster_name=None): cluster = self._get_cluster(cluster_name) try: services = self._root.get( 'clusters/%(cluster_name)s/serviceTypes' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] return service_name in services except RestException as e: raise ManagerApiException(e) def get_spark_history_server_configs(self, cluster_name=None): service_name = "SPARK_ON_YARN" shs_role_type = "SPARK_YARN_HISTORY_SERVER" try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(cluster_name)s/services' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] service_display_names = [ service['displayName'] for service in services if service['type'] == service_name ] if service_display_names: spark_service_display_name = service_display_names[0] servers = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles' % { 'cluster_name': cluster['name'], 'spark_service_display_name': spark_service_display_name })['items'] shs_server_names = [ server['name'] for server in servers if server['type'] == shs_role_type ] shs_server_name = shs_server_names[ 0] if shs_server_names else None shs_server_hostRef = [ server['hostRef'] for server in servers if server['type'] == shs_role_type ] shs_server_hostId = shs_server_hostRef[0][ 'hostId'] if shs_server_hostRef else None if shs_server_name and shs_server_hostId: shs_server_configs = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % { 'cluster_name': cluster['name'], 'spark_service_display_name': spark_service_display_name, 'shs_server_name': shs_server_name }, params={'view': 'full'})['items'] return shs_server_hostId, shs_server_configs except Exception as e: LOG.warning("Check Spark History Server via ManagerApi: %s" % e) return None, None def get_spark_history_server_url(self, cluster_name=None): shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs( cluster_name=cluster_name) if shs_server_hostId and shs_server_configs: shs_ui_port = None shs_ssl_port = None shs_ssl_enabled = None for config in shs_server_configs: if 'relatedName' in config and 'default' in config: if config['relatedName'] == 'spark.history.ui.port': shs_ui_port = config['default'] if config['relatedName'] == 'spark.ssl.historyServer.port': shs_ssl_port = config['default'] if config[ 'relatedName'] == 'spark.ssl.historyServer.enabled': shs_ssl_enabled = config['default'] shs_ui_host = self._root.get('hosts/%(hostId)s' % {'hostId': shs_server_hostId}) shs_ui_hostname = shs_ui_host['hostname'] if shs_ui_host else None return self.assemble_shs_url(shs_ui_hostname, shs_ui_port, shs_ssl_port, shs_ssl_enabled) return None def get_spark_history_server_security_enabled(self, cluster_name=None): shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs( cluster_name=cluster_name) if shs_server_configs: for config in shs_server_configs: if 'relatedName' in config and 'default' in config and config[ 'relatedName'] == 'history_server_spnego_enabled': shs_security_enabled = config['default'] return shs_security_enabled and shs_security_enabled == 'true' return False def assemble_shs_url(self, shs_ui_hostname, shs_ui_port=None, shs_ssl_port=None, shs_ssl_enabled=None): if not shs_ui_hostname or not shs_ui_port or not shs_ssl_port or not shs_ssl_enabled: LOG.warning("Spark conf not found!") return None protocol = 'https' if shs_ssl_enabled.lower() == 'true' else 'http' shs_url = '%(protocol)s://%(hostname)s:%(port)s' % { 'protocol': protocol, 'hostname': shs_ui_hostname, 'port': shs_ssl_port if shs_ssl_enabled.lower() == 'true' else shs_ui_port, } return shs_url def tools_echo(self): try: params = (('message', 'hello'), ) LOG.info(params) return self._root.get('tools/echo', params=params) except RestException as e: raise ManagerApiException(e) def get_kafka_brokers(self, cluster_name=None): try: hosts = self._get_hosts('KAFKA', 'KAFKA_BROKER', cluster_name=cluster_name) brokers_hosts = [host['hostname'] + ':9092' for host in hosts] return ','.join(brokers_hosts) except RestException as e: raise ManagerApiException(e) def get_kudu_master(self, cluster_name=None): try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(name)s/services' % cluster)['items'] service = [ service for service in services if service['type'] == 'KUDU' ][0] master = self._get_roles(cluster['name'], service['name'], 'KUDU_MASTER')[0] master_host = self._root.get('hosts/%(hostId)s' % master['hostRef']) return master_host['hostname'] except RestException as e: raise ManagerApiException(e) def get_kafka_topics(self, broker_host): try: client = HttpClient('http://%s:24042' % broker_host, logger=LOG) root = Resource(client) return root.get('/api/topics') except RestException as e: raise ManagerApiException(e) def update_flume_config(self, cluster_name, config_name, config_value): service = 'FLUME-1' cluster = self._get_cluster(cluster_name) roleConfigGroup = [ role['roleConfigGroupRef']['roleConfigGroupName'] for role in self._get_roles(cluster['name'], service, 'AGENT') ] data = { u'items': [{ u'url': u'/api/v8/clusters/%(cluster_name)s/services/%(service)s/roleConfigGroups/%(roleConfigGroups)s/config?message=Updated%20service%20and%20role%20type%20configurations.' .replace('%(cluster_name)s', urllib_quote(cluster['name'])).replace( '%(service)s', service).replace('%(roleConfigGroups)s', roleConfigGroup[0]), u'body': { u'items': [{ u'name': config_name, u'value': config_value }] }, u'contentType': u'application/json', u'method': u'PUT' }] } return self.batch(items=data) def get_flume_agents(self, cluster_name=None): return [ host['hostname'] for host in self._get_hosts( 'FLUME', 'AGENT', cluster_name=cluster_name) ] def _get_hosts(self, service_name, role_name, cluster_name=None): try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(name)s/services' % cluster)['items'] service = [ service for service in services if service['type'] == service_name ][0] hosts = self._get_roles(cluster['name'], service['name'], role_name) hosts_ids = [host['hostRef']['hostId'] for host in hosts] hosts = self._root.get('hosts')['items'] return [host for host in hosts if host['hostId'] in hosts_ids] except RestException as e: raise ManagerApiException(e) def refresh_flume(self, cluster_name, restart=False): service = 'FLUME-1' cluster = self._get_cluster(cluster_name) roles = [ role['name'] for role in self._get_roles(cluster['name'], service, 'AGENT') ] if restart: return self.restart_services(cluster['name'], service, roles) else: return self.refresh_configs(cluster['name'], service, roles) def refresh_configs(self, cluster_name, service=None, roles=None): try: if service is None: return self._root.post( 'clusters/%(cluster_name)s/commands/refresh' % {'cluster_name': cluster_name}, contenttype="application/json") elif roles is None: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % { 'cluster_name': cluster_name, 'service': service }, contenttype="application/json") else: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % { 'cluster_name': cluster_name, 'service': service }, data=json.dumps({"items": roles}), contenttype="application/json") except RestException as e: raise ManagerApiException(e) def restart_services(self, cluster_name, service=None, roles=None): try: if service is None: return self._root.post( 'clusters/%(cluster_name)s/commands/restart' % {'cluster_name': cluster_name}, contenttype="application/json") elif roles is None: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % { 'cluster_name': cluster_name, 'service': service }, contenttype="application/json") else: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % { 'cluster_name': cluster_name, 'service': service }, data=json.dumps({"items": roles}), contenttype="application/json") except RestException as e: raise ManagerApiException(e) def batch(self, items): try: return self._root.post('batch', data=json.dumps(items), contenttype='application/json') except RestException as e: raise ManagerApiException(e) def _get_cluster(self, cluster_name=None): clusters = self._root.get('clusters/')['items'] if cluster_name is not None: cluster = [ cluster for cluster in clusters if cluster['name'] == cluster_name ][0] else: cluster = clusters[0] return cluster def _get_roles(self, cluster_name, service_name, role_type): roles = self._root.get( 'clusters/%(cluster_name)s/services/%(service_name)s/roles' % { 'cluster_name': cluster_name, 'service_name': service_name })['items'] return [role for role in roles if role['type'] == role_type] def get_impalad_config(self, key=None, impalad_host=None, cluster_name=None): if not key or not impalad_host: return None service_name = "IMPALA" role_type = 'IMPALAD' try: cluster = self._get_cluster(cluster_name) services = self._root.get('clusters/%(cluster_name)s/services' % { 'cluster_name': cluster['name'], 'service_name': service_name })['items'] service_display_names = [ service['displayName'] for service in services if service['type'] == service_name ] hosts = self._root.get('hosts')['items'] impalad_hostIds = [ host['hostId'] for host in hosts if host['hostname'] == impalad_host ] if impalad_hostIds and service_display_names: impalad_hostId = impalad_hostIds[0] impala_service_display_name = service_display_names[0] servers = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles' % { 'cluster_name': cluster['name'], 'spark_service_display_name': impala_service_display_name })['items'] impalad_server_names = [ server['name'] for server in servers if server['type'] == role_type and server['hostRef']['hostId'] == impalad_hostId ] impalad_server_name = impalad_server_names[ 0] if impalad_server_names else None if impalad_server_name: server_configs = self._root.get( 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % { 'cluster_name': cluster['name'], 'spark_service_display_name': impala_service_display_name, 'shs_server_name': impalad_server_name }, params={'view': 'full'})['items'] for config in server_configs: if 'relatedName' in config and 'value' in config: if config['relatedName'] == key: return config['value'] except Exception as e: LOG.warning( "Get Impala Daemon API configurations via ManangerAPI: %s" % e) return None
class OozieApi(object): def __init__(self, oozie_url, user, security_enabled=False, api_version=API_VERSION, ssl_cert_ca_verify=True): self._url = posixpath.join(oozie_url, api_version) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) self._security_enabled = security_enabled # To store username info if hasattr(user, 'username'): self.user = user.username else: self.user = user self.api_version = api_version def __str__(self): return "OozieApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled def _get_params(self): if self.security_enabled: return { 'doAs': self.user, 'timezone': TIME_ZONE.get() } return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() } def _get_oozie_properties(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime') VALID_LOG_FILTERS = {'recent', 'limit', 'loglevel', 'text'} def get_jobs(self, jobtype, offset=None, cnt=None, filters=None): """ Get a list of Oozie jobs. Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = [] params['jobtype'] = jobtype filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) # Send the request resp = self._root.get('jobs', params) if jobtype == 'wf': wf_list = WorkflowList(self, resp, filters=filters) elif jobtype == 'coord': wf_list = CoordinatorList(self, resp, filters=filters) else: wf_list = BundleList(self, resp, filters=filters) return wf_list def get_workflows(self, offset=None, cnt=None, filters=None): return self.get_jobs('wf', offset, cnt, filters) def get_coordinators(self, offset=None, cnt=None, filters=None): return self.get_jobs('coord', offset, cnt, filters) def get_bundles(self, offset=None, cnt=None, filters=None): return self.get_jobs('bundle', offset, cnt, filters) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid, offset=None, cnt=None, filters=None): params = self._get_params() if offset is not None: params['offset'] = str(offset) if cnt is not None: params['len'] = str(cnt) if filters is None: filters = {} params.update({'order': 'desc'}) filter_list = [] for key, val in filters: if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['filter'] = ';'.join(filter_list) resp = self._root.get('job/%s' % (jobid,), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get('job/%s' % (jobid,), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params['show'] = 'definition' return self._root.get('job/%s' % (jobid,), params) def get_job_log(self, jobid, logfilter=None): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params['show'] = 'log' filter_list = [] if logfilter is None: logfilter = [] for key, val in logfilter: if key not in OozieApi.VALID_LOG_FILTERS: raise ValueError('"%s" is not a valid filter for job logs' % (key,)) filter_list.append('%s=%s' % (key, val)) params['logfilter'] = ';'.join(filter_list) return self._root.get('job/%s' % (jobid,), params) def get_job_status(self, jobid): params = self._get_params() params['show'] = 'status' xml = self._root.get('job/%s' % (jobid,), params) return xml def get_action(self, action_id): if 'C@' in action_id: Klass = CoordinatorAction elif 'B@' in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get('job/%s' % (action_id,), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun', 'change', 'ignore'): msg = 'Invalid oozie job action: %s' % (action,) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params['action'] = action if parameters is not None: params.update(parameters) return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = { 'oozie.wf.application.path': application_path, 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp['id'] def dryrun(self, properties=None): defaults = { 'user.name': self.user, } if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() params['action'] = 'dryrun' return self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params['action'] = 'rerun' return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get('admin/build-version', params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get('admin/instrumentation', params) return resp def get_metrics(self): params = self._get_params() resp = self._root.get('admin/metrics', params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get('admin/configuration', params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get('admin/status', params) return resp def get_oozie_slas(self, **kwargs): """ filter= app_name=my-sla-app id=0000002-131206135002457-oozie-oozi-W nominal_start=2013-06-18T00:01Z nominal_end=2013-06-23T00:01Z """ params = self._get_params() params['filter'] = ';'.join(['%s=%s' % (key, val) for key, val in kwargs.iteritems()]) resp = self._root.get('sla', params) return resp['slaSummaryList']
class LivyClient(object): def __init__(self, livy_url): self._url = posixpath.join(livy_url) self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = SECURITY_ENABLED.get() self._csrf_enabled = CSRF_ENABLED.get() self._thread_local = threading.local() if self.security_enabled: self._client.set_kerberos_auth() if self.csrf_enabled: self._client.set_headers({'X-Requested-By': 'hue'}) self._client.set_verify(SSL_CERT_CA_VERIFY.get()) def __str__(self): return "LivyClient at %s" % (self._url, ) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def csrf_enabled(self): return self._csrf_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user def get_status(self): return self._root.get('sessions') def get_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('sessions/%s/log' % uuid, params=params) return '\n'.join(response['log']) def create_session(self, **properties): properties['proxyUser'] = self.user.split('@')[0] if has_connectors(): # Only SQL supported via connectors currently properties['kind'] = 'sql' return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_sessions(self): return self._root.get('sessions') def get_session(self, uuid): return self._root.get('sessions/%s' % uuid) def get_statements(self, uuid): return self._root.get('sessions/%s/statements' % uuid) def submit_statement(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def inspect(self, uuid, statement): data = {'code': statement} return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) def fetch_data(self, session, statement): return self._root.get('sessions/%s/statements/%s' % (session, statement)) def cancel(self, session): return self._root.post('sessions/%s/interrupt' % session) def close(self, uuid): return self._root.delete('sessions/%s' % uuid) def get_batches(self): return self._root.get('batches') def submit_batch(self, properties): properties['proxyUser'] = self.user return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE) def get_batch(self, uuid): return self._root.get('batches/%s' % uuid) def get_batch_status(self, uuid): response = self._root.get('batches/%s/state' % uuid) return response['state'] def get_batch_log(self, uuid, startFrom=None, size=None): params = {} if startFrom is not None: params['from'] = startFrom if size is not None: params['size'] = size response = self._root.get('batches/%s/log' % uuid, params=params) return '\n'.join(response['log']) def close_batch(self, uuid): return self._root.delete('batches/%s' % uuid)
class OozieApi(object): def __init__(self, oozie_url, security_enabled=False, api_version=API_VERSION): self._url = posixpath.join(oozie_url, api_version) self._client = HttpClient(self._url, logger=LOG) if security_enabled: self._client.set_kerberos_auth() self._root = Resource(self._client) self._security_enabled = security_enabled # To store username info self._thread_local = threading.local() self.api_version = api_version def __str__(self): return "OozieApi at %s" % (self._url,) @property def url(self): return self._url @property def security_enabled(self): return self._security_enabled @property def user(self): return self._thread_local.user def setuser(self, user): if hasattr(user, "username"): self._thread_local.user = user.username else: self._thread_local.user = user def _get_params(self): if self.security_enabled: return {"doAs": self.user, "timezone": TIME_ZONE.get()} return {"user.name": DEFAULT_USER, "doAs": self.user, "timezone": TIME_ZONE.get()} def _get_oozie_properties(self, properties=None): defaults = {"user.name": self.user} if properties is not None: defaults.update(properties) return defaults VALID_JOB_FILTERS = ("name", "user", "group", "status") def get_jobs(self, jobtype, offset=None, cnt=None, **kwargs): """ Get a list of Oozie jobs. jobtype is 'wf', 'coord' Note that offset is 1-based. kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status """ params = self._get_params() if offset is not None: params["offset"] = str(offset) if cnt is not None: params["len"] = str(cnt) params["jobtype"] = jobtype filter_list = [] for key, val in kwargs.iteritems(): if key not in OozieApi.VALID_JOB_FILTERS: raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,)) filter_list.append("%s=%s" % (key, val)) params["filter"] = ";".join(filter_list) # Send the request resp = self._root.get("jobs", params) if jobtype == "wf": wf_list = WorkflowList(self, resp, filters=kwargs) elif jobtype == "coord": wf_list = CoordinatorList(self, resp, filters=kwargs) else: wf_list = BundleList(self, resp, filters=kwargs) return wf_list def get_workflows(self, offset=None, cnt=None, **kwargs): return self.get_jobs("wf", offset, cnt, **kwargs) def get_coordinators(self, offset=None, cnt=None, **kwargs): return self.get_jobs("coord", offset, cnt, **kwargs) def get_bundles(self, offset=None, cnt=None, **kwargs): return self.get_jobs("bundle", offset, cnt, **kwargs) # TODO: make get_job accept any jobid def get_job(self, jobid): """ get_job(jobid) -> Workflow """ params = self._get_params() resp = self._root.get("job/%s" % (jobid,), params) wf = Workflow(self, resp) return wf def get_coordinator(self, jobid): params = self._get_params() params.update({"len": -1}) resp = self._root.get("job/%s" % (jobid,), params) return Coordinator(self, resp) def get_bundle(self, jobid): params = self._get_params() resp = self._root.get("job/%s" % (jobid,), params) return Bundle(self, resp) def get_job_definition(self, jobid): """ get_job_definition(jobid) -> Definition (xml string) """ params = self._get_params() params["show"] = "definition" xml = self._root.get("job/%s" % (jobid,), params) return xml def get_job_log(self, jobid): """ get_job_log(jobid) -> Log (xml string) """ params = self._get_params() params["show"] = "log" xml = self._root.get("job/%s" % (jobid,), params) return xml def get_action(self, action_id): if "C@" in action_id: Klass = CoordinatorAction elif "B@" in action_id: Klass = BundleAction else: Klass = WorkflowAction params = self._get_params() resp = self._root.get("job/%s" % (action_id,), params) return Klass(resp) def job_control(self, jobid, action, properties=None, parameters=None): """ job_control(jobid, action) -> None Raise RestException on error. """ if action not in ("start", "suspend", "resume", "kill", "rerun", "coord-rerun", "bundle-rerun"): msg = "Invalid oozie job action: %s" % (action,) LOG.error(msg) raise ValueError(msg) properties = self._get_oozie_properties(properties) params = self._get_params() params["action"] = action if parameters is not None: params.update(parameters) return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def submit_workflow(self, application_path, properties=None): """ submit_workflow(application_path, properties=None) -> jobid Raise RestException on error. """ defaults = {"oozie.wf.application.path": application_path, "user.name": self.user} if properties is not None: defaults.update(properties) properties = defaults return self.submit_job(properties) # Is name actually submit_coord? def submit_job(self, properties=None): """ submit_job(properties=None, id=None) -> jobid Raise RestException on error. """ defaults = {"user.name": self.user} if properties is not None: defaults.update(properties) properties = defaults params = self._get_params() resp = self._root.post("jobs", params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) return resp["id"] def rerun(self, jobid, properties=None, params=None): properties = self._get_oozie_properties(properties) if params is None: params = self._get_params() else: self._get_params().update(params) params["action"] = "rerun" return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) def get_build_version(self): """ get_build_version() -> Build version (dictionary) """ params = self._get_params() resp = self._root.get("admin/build-version", params) return resp def get_instrumentation(self): params = self._get_params() resp = self._root.get("admin/instrumentation", params) return resp def get_configuration(self): """ get_configuration() -> Oozie config (dictionary) """ params = self._get_params() resp = self._root.get("admin/configuration", params) return resp def get_oozie_status(self): """ get_oozie_status() -> Oozie status (dictionary) """ params = self._get_params() resp = self._root.get("admin/status", params) return resp def get_oozie_slas(self, **kwargs): """ filter= app_name=my-sla-app id=0000002-131206135002457-oozie-oozi-W nominal_start=2013-06-18T00:01Z nominal_end=2013-06-23T00:01Z """ params = self._get_params() params["filter"] = ";".join(["%s=%s" % (key, val) for key, val in kwargs.iteritems()]) resp = self._root.get("sla", params) return resp["slaSummaryList"]