Exemplo n.º 1
0
class ResourceManagerApi(object):
  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    self._ssl_cert_ca_verify = ssl_cert_ca_verify

    if self._security_enabled:
      self._client.set_kerberos_auth()
      if ssl_cert_ca_verify:
        self._client.set_verify(True)

  def __str__(self):
    return "ResourceManagerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def cluster(self, **kwargs):
    return self._root.get('cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def apps(self, **kwargs):
    return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def app(self, app_id):
    return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, app_id):
    return self._root.put('cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE)
Exemplo n.º 2
0
class ManagerApi(object):
    """
  https://cloudera.github.io/cm_api/
  """
    def __init__(self,
                 user=None,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION)
        self._username = get_navigator_auth_username()
        self._password = get_navigator_auth_password()

        self.user = user
        self._client = HttpClient(self._api_url, logger=LOG)

        if security_enabled:
            self._client.set_kerberos_auth()
        else:
            self._client.set_basic_auth(self._username, self._password)

        self._client.set_verify(ssl_cert_ca_verify)
        self._root = Resource(self._client)

    def tools_echo(self):
        try:
            params = (('message', 'hello'), )

            LOG.info(params)
            return self._root.get('tools/echo', params=params)
        except RestException, e:
            raise ManagerApiException(e)
Exemplo n.º 3
0
class ResourceManagerApi(object):
    def __init__(self,
                 oozie_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "ResourceManagerApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    def cluster(self, **kwargs):
        return self._execute(self._root.get,
                             'cluster',
                             params=kwargs,
                             headers={'Accept': _JSON_CONTENT_TYPE})

    def apps(self, **kwargs):
        return self._execute(self._root.get,
                             'cluster/apps',
                             params=kwargs,
                             headers={'Accept': _JSON_CONTENT_TYPE})

    def app(self, app_id):
        return self._execute(self._root.get,
                             'cluster/apps/%(app_id)s' % {'app_id': app_id},
                             headers={'Accept': _JSON_CONTENT_TYPE})

    def kill(self, app_id):
        return self._execute(self._root.put,
                             'cluster/apps/%(app_id)s/state' %
                             {'app_id': app_id},
                             data=json.dumps({'state': 'KILLED'}),
                             contenttype=_JSON_CONTENT_TYPE)

    def _execute(self, function, *args, **kwargs):
        response = function(*args, **kwargs)

        # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has
        # failed back to the master RM.
        if isinstance(response, str) and response.startswith(
                'This is standby RM. Redirecting to the current active RM'):
            raise YarnFailoverOccurred(response)

        return response
Exemplo n.º 4
0
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url, user):
    self._url = solr_url
    self._user = user
    self._client = HttpClient(self._url, logger=LOG)
    self.security_enabled = SECURITY_ENABLED.get()
    if self.security_enabled:
      self._client.set_kerberos_auth()
    self._root = resource.Resource(self._client)

  def _get_params(self):
    if self.security_enabled:
      return (('doAs', self._user ),)
    return (('user.name', DEFAULT_USER), ('doAs', self._user),)

  @classmethod
  def _get_json(cls, response):
    if type(response) != dict:
      # Got 'plain/text' mimetype instead of 'application/json'
      try:
        response = json.loads(response)
      except ValueError, e:
        # Got some null bytes in the response
        LOG.error('%s: %s' % (unicode(e), repr(response)))
        response = json.loads(response.replace('\x00', ''))
    return response
Exemplo n.º 5
0
class ManagerApi(object):
    """
  https://cloudera.github.io/cm_api/
  """
    def __init__(self,
                 user=None,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION)
        self._username = get_navigator_auth_username()
        self._password = get_navigator_auth_password()

        self.user = user
        self._client = HttpClient(self._api_url, logger=LOG)

        if security_enabled:
            self._client.set_kerberos_auth()
        else:
            self._client.set_basic_auth(self._username, self._password)

        self._client.set_verify(ssl_cert_ca_verify)
        self._root = Resource(self._client)

    def has_service(self, service_name, cluster_name=None):
        cluster = self._get_cluster(cluster_name)
        try:
            services = self._root.get(
                'clusters/%(cluster_name)s/serviceTypes' % {
                    'cluster_name': cluster['name'],
                    'service_name': service_name
                })['items']

            return service_name in services
        except RestException, e:
            raise ManagerApiException(e)
Exemplo n.º 6
0
class ResourceManagerApi(object):
  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "ResourceManagerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def cluster(self, **kwargs):
    return self._root.get('cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def apps(self, **kwargs):
    return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def app(self, app_id):
    return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, app_id):
    return self._root.put('cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE)
Exemplo n.º 7
0
def get_log_client(log_link):
  global _log_client_queue
  global MAX_HEAP_SIZE
  _log_client_lock.acquire()
  try:
    components = urlparse.urlsplit(log_link)
    base_url = '%(scheme)s://%(netloc)s' % {
      'scheme': components[0],
      'netloc': components[1]
    }

    # Takes on form (epoch time, client object)
    # Least Recently Used algorithm.
    client_tuple = next((tup for tup in _log_client_heap if tup[1].base_url == base_url), None)
    if client_tuple is None:
      client = HttpClient(base_url, LOG)
      yarn_cluster = cluster.get_cluster_conf_for_job_submission()
      if yarn_cluster.SECURITY_ENABLED.get():
        client.set_kerberos_auth()
    else:
      _log_client_heap.remove(client_tuple)
      client = client_tuple[1]

    new_client_tuple = (time.time(), client)
    if len(_log_client_heap) >= MAX_HEAP_SIZE:
      heapq.heapreplace(_log_client_heap, new_client_tuple)
    else:
      heapq.heappush(_log_client_heap, new_client_tuple)

    return client
  finally:
    _log_client_lock.release()
Exemplo n.º 8
0
class ResourceManagerApi(object):
  def __init__(self, oozie_url, security_enabled=False):
    self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

  def __str__(self):
    return "NodeManagerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def containers(self):
    return self._root.get('node/containers', headers={'Accept': _JSON_CONTENT_TYPE})

  def container(self, container_id):
    return self._root.get('node/containers/%(container_id)s' % {'container_id': container_id}, headers={'Accept': _JSON_CONTENT_TYPE})
Exemplo n.º 9
0
def _query_store_proxy(request, path=None):
    response = {'status': -1}

    headers = {
        'x-do-as': request.user.username,
        'X-Requested-By': 'das',
        'Content-Type': 'application/json; charset=UTF-8'
    }

    client = HttpClient(QUERY_STORE.SERVER_URL.get())
    resource = Resource(client)

    if USE_SASL.get():
        client.set_kerberos_auth()

    try:
        response = resource.invoke(request.method, path, request.GET.dict(),
                                   request.body, headers)
    except RestException as e:
        ex_response = e.get_parent_ex().response

        if ex_response is not None:
            response['code'] = ex_response.status_code
            response['message'] = ex_response.reason
            response['content'] = ex_response.text
        else:
            response['message'] = 'Query store not reachable!'
            response['content'] = e.message

    return response
Exemplo n.º 10
0
class ResourceManagerApi(object):
  def __init__(self, oozie_url, security_enabled=False):
    self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

  def __str__(self):
    return "ResourceManagerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def apps(self, **kwargs):
    return self._root.get('cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def app(self, app_id):
    return self._root.get('cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE})
Exemplo n.º 11
0
def get_log_client(log_link):
    global _log_client_queue
    global MAX_HEAP_SIZE
    _log_client_lock.acquire()
    try:
        components = urlparse.urlsplit(log_link)
        base_url = '%(scheme)s://%(netloc)s' % {
            'scheme': components[0],
            'netloc': components[1]
        }

        # Takes on form (epoch time, client object)
        # Least Recently Used algorithm.
        client_tuple = next(
            (tup for tup in _log_client_heap if tup[1].base_url == base_url),
            None)
        if client_tuple is None:
            client = HttpClient(base_url, LOG)
            yarn_cluster = cluster.get_cluster_conf_for_job_submission()
            if yarn_cluster.SECURITY_ENABLED.get():
                client.set_kerberos_auth()
        else:
            _log_client_heap.remove(client_tuple)
            client = client_tuple[1]

        new_client_tuple = (time.time(), client)
        if len(_log_client_heap) >= MAX_HEAP_SIZE:
            heapq.heapreplace(_log_client_heap, new_client_tuple)
        else:
            heapq.heappush(_log_client_heap, new_client_tuple)

        return client
    finally:
        _log_client_lock.release()
Exemplo n.º 12
0
class NodeManagerApi(object):
    def __init__(self,
                 oozie_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=True):
        self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "NodeManagerApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    def containers(self):
        return self._root.get('node/containers',
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def container(self, container_id):
        return self._root.get('node/containers/%(container_id)s' %
                              {'container_id': container_id},
                              headers={'Accept': _JSON_CONTENT_TYPE})
Exemplo n.º 13
0
class ResourceManagerApi(object):
    def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=True):
        self._url = posixpath.join(oozie_url, "ws", _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "NodeManagerApi at %s" % (self._url,)

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    def containers(self):
        return self._root.get("node/containers", headers={"Accept": _JSON_CONTENT_TYPE})

    def container(self, container_id):
        return self._root.get(
            "node/containers/%(container_id)s" % {"container_id": container_id}, headers={"Accept": _JSON_CONTENT_TYPE}
        )
Exemplo n.º 14
0
Arquivo: api.py Projeto: ycaihua/hue
class SolrApi(object):
    """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
    def __init__(self, solr_url, user):
        self._url = solr_url
        self._user = user
        self._client = HttpClient(self._url, logger=LOG)
        self.security_enabled = SECURITY_ENABLED.get()
        if self.security_enabled:
            self._client.set_kerberos_auth()
        self._root = resource.Resource(self._client)

    def _get_params(self):
        if self.security_enabled:
            return (('doAs', self._user), )
        return (
            ('user.name', DEFAULT_USER),
            ('doAs', self._user),
        )

    @classmethod
    def _get_json(cls, response):
        if type(response) != dict:
            # Got 'plain/text' mimetype instead of 'application/json'
            try:
                response = json.loads(response)
            except ValueError, e:
                # Got some null bytes in the response
                LOG.error('%s: %s' % (unicode(e), repr(response)))
                response = json.loads(response.replace('\x00', ''))
        return response
Exemplo n.º 15
0
Arquivo: api.py Projeto: Roxasora/hue
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url):
    self._url = solr_url
    self._client = HttpClient(self._url, logger=LOG)
    if SECURITY_ENABLED.get():
      self._client.set_kerberos_auth()
    self._root = Resource(self._client)

  def query(self, solr_query, hue_core):
    try:
      params = (
          ('q', solr_query['q'] or EMPTY_QUERY.get()),
          ('wt', 'json'),
          ('rows', solr_query['rows']),
          ('start', solr_query['start']),
      )

      params += hue_core.get_query(solr_query)

      fqs = solr_query['fq'].split('|')
      for fq in fqs:
        if fq:
          params += (('fq', fq),)

      response = self._root.get('%(collection)s/select' % solr_query, params)

      if type(response) != dict:
        # Got 'plain/text' mimetype instead of 'application/json'
        response = json.loads(response)
      return response
    except RestException, e:
      raise PopupException('Error while accessing Solr: %s' % e)
Exemplo n.º 16
0
def query_store_api(request, path=None):
    response = {'status': -1}

    if USE_PROXY.get():
        content_type = 'application/json; charset=UTF-8'
        headers = {'X-Requested-By': 'das', 'Content-Type': content_type}

        client = HttpClient(QUERY_STORE.SERVER_URL.get())
        resource = Resource(client)
        if USE_SASL.get():
            client.set_kerberos_auth()

        try:
            response = resource.invoke(request.method, path,
                                       request.GET.dict(), request.body,
                                       headers)
        except RestException as e:
            ex_response = e.get_parent_ex().response
            response['code'] = ex_response.status_code
            response['message'] = ex_response.reason
            response['content'] = ex_response.text
    else:
        if path == 'api/query/search':
            filters = json.loads(request.body)
            resp = get_api(request.user,
                           interface='queries-hive').apps(filters['search'])
            response = resp['apps']

    return JsonResponse(response)
Exemplo n.º 17
0
class THttpClient(TTransportBase):
    """
  HTTP transport mode for Thrift.

  HTTPS and Kerberos support with Request.

  e.g.
  mode = THttpClient('http://hbase-thrift-v1.com:9090')
  mode = THttpClient('http://hive-localhost:10001/cliservice')
  """
    def __init__(self, base_url):
        self._base_url = base_url
        self._client = HttpClient(self._base_url, logger=LOG)
        self._data = None
        self._headers = None
        self._wbuf = buffer_writer()

    def open(self):
        pass

    def set_kerberos_auth(self, service="HTTP"):
        self._client.set_kerberos_auth(service=service)

    def set_basic_auth(self, username, password):
        self._client.set_basic_auth(username, password)

    def set_bearer_auth(self, token):
        self._client.set_bearer_auth(token)

    def set_verify(self, verify=True):
        self._client.set_verify(verify)

    def close(self):
        self._headers = None
        # Close session too?

    def isOpen(self):
        return self._client is not None

    def setTimeout(self, ms):
        if not self._headers:
            self._headers = {}
        self._headers.update(timeout=str(int(ms / 1000)))

    def setCustomHeaders(self, headers):
        self._headers = headers

    def read(self, sz):
        return self._data

    def write(self, buf):
        self._wbuf.write(buf)

    def flush(self):
        data = self._wbuf.getvalue()
        self._wbuf = buffer_writer()

        # POST
        self._root = Resource(self._client)
        self._data = self._root.post('', data=data, headers=self._headers)
Exemplo n.º 18
0
class SparkHistoryServerApi(object):
    def __init__(self,
                 spark_hs_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._ui_url = spark_hs_url
        self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "Spark History Server API at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def ui_url(self):
        return self._ui_url

    @property
    def headers(self):
        return {'Accept': _JSON_CONTENT_TYPE}

    def applications(self):
        return self._root.get('applications', headers=self.headers)

    def application(self, app_id):
        return self._root.get('applications/%(app_id)s' % {'app_id': app_id},
                              headers=self.headers)

    def jobs(self, app_id, attempt_id):
        return self._root.get('applications/%(app_id)s/%(attempt_id)s/jobs' % {
            'app_id': app_id,
            'attempt_id': attempt_id
        },
                              headers=self.headers)

    def stages(self, app_id, attempt_id):
        return self._root.get('applications/%(app_id)s/%(attempt_id)s/stages' %
                              {
                                  'app_id': app_id,
                                  'attempt_id': attempt_id
                              },
                              headers=self.headers)

    def executors(self, app_id, attempt_id):
        return self._root.get(
            'applications/%(app_id)s/%(attempt_id)s/executors' % {
                'app_id': app_id,
                'attempt_id': attempt_id
            },
            headers=self.headers)
Exemplo n.º 19
0
class THttpClient(TTransportBase):
    """
  HTTP transport mode for Thrift.

  HTTPS and Kerberos support with Request.

  e.g.
  mode = THttpClient('http://hbase-thrift-v1.com:9090')
  mode = THttpClient('http://hive-localhost:10001/cliservice')
  """
    def __init__(self, base_url, cert_validate=True):
        self._base_url = base_url
        self._client = HttpClient(self._base_url,
                                  logger=LOG,
                                  cert_validate=cert_validate)
        self._data = None
        self._headers = None
        self._wbuf = StringIO()

    def open(self):
        pass

    def set_basic_auth(self, username, password):
        self._client.set_basic_auth(username, password)

    def set_kerberos_auth(self):
        self._client.set_kerberos_auth()

    def close(self):
        self._headers = None
        # Close session too?

    def isOpen(self):
        return self._client is not None

    def setTimeout(self, ms):
        pass

    def setCustomHeaders(self, headers):
        self._headers = headers

    def read(self, sz):
        return self._data

    def write(self, buf):
        self._wbuf.write(buf)

    def flush(self):
        if self.isOpen():
            self.close()
        self.open()

        data = self._wbuf.getvalue()
        self._wbuf = StringIO()

        # POST
        self._root = Resource(self._client)
        self._data = self._root.post('', data=data)
Exemplo n.º 20
0
class THttpClient(TTransportBase):
  """
  HTTP transport mode for Thrift.

  HTTPS and Kerberos support with Request.

  e.g.
  mode = THttpClient('http://hbase-thrift-v1.com:9090')
  mode = THttpClient('http://hive-localhost:10001/cliservice')
  """

  def __init__(self, base_url, cert_validate=True):
    self._base_url = base_url
    self._client = HttpClient(self._base_url, logger=LOG, cert_validate=cert_validate)
    self._data = None
    self._headers = None
    self._wbuf = StringIO()

  def open(self):
    pass

  def set_basic_auth(self, username, password):
    self._client.set_basic_auth(username, password)

  def set_kerberos_auth(self):
      self._client.set_kerberos_auth()

  def close(self):
    self._headers = None
    # Close session too?

  def isOpen(self):
    return self._client is not None

  def setTimeout(self, ms):
    pass

  def setCustomHeaders(self, headers):
    self._headers = headers

  def read(self, sz):
    return self._data

  def write(self, buf):
    self._wbuf.write(buf)

  def flush(self):
    if self.isOpen():
      self.close()
    self.open()

    data = self._wbuf.getvalue()
    self._wbuf = StringIO()

    # POST
    self._root = Resource(self._client)
    self._data = self._root.post('', data=data)
Exemplo n.º 21
0
class MapreduceApi(object):

  def __init__(self, oozie_url, security_enabled=False):
    self._url = posixpath.join(oozie_url, 'proxy')
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

  def __str__(self):
    return "MapreduceApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  def job(self, user, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def counters(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def tasks(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def job_attempts(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def conf(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_counters(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempts(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt(self, job_id, task_id, attempt_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})
Exemplo n.º 22
0
class SolrApi(object):
    """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
    def __init__(self, solr_url, user):
        self._url = solr_url
        self._user = user
        self._client = HttpClient(self._url, logger=LOG)
        self.security_enabled = SECURITY_ENABLED.get()
        if self.security_enabled:
            self._client.set_kerberos_auth()
        self._root = resource.Resource(self._client)

    def _get_params(self):
        if self.security_enabled:
            return (('doAs', self._user), )
        return (
            ('user.name', DEFAULT_USER),
            ('doAs', self._user),
        )

    def query(self, solr_query, hue_core):
        try:
            params = self._get_params() + (
                ('q', solr_query['q'] or EMPTY_QUERY.get()),
                ('wt', 'json'),
                ('rows', solr_query['rows']),
                ('start', solr_query['start']),
            )

            params += hue_core.get_query(solr_query)

            fqs = solr_query['fq'].split('|')
            for fq in fqs:
                if fq:
                    params += (('fq', fq), )

            response = self._root.get('%(collection)s/select' % solr_query,
                                      params)

            if type(response) != dict:
                # Got 'plain/text' mimetype instead of 'application/json'
                try:
                    response = json.loads(response)
                except ValueError, e:
                    # Got some null bytes in the response
                    LOG.error('%s: %s' % (unicode(e), repr(response)))
                    response = json.loads(response.replace('\x00', ''))
            return response
        except RestException, e:
            raise PopupException(e, title=_('Error while accessing Solr'))
Exemplo n.º 23
0
def query_store_download_bundle(request, id=None):
  response = {}

  client = HttpClient(QUERY_STORE.SERVER_URL.get())
  resource = Resource(client)
  if USE_SASL.get():
    client.set_kerberos_auth()

  app = resource.get('api/data-bundle/' + id)

  response = FileResponse((app, 'rb'), content_type='application/octet-stream')
  response['Content-Disposition'] = 'attachment; filename=' + id + '.zip'

  return response
Exemplo n.º 24
0
class HistoryServerApi(object):

  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "HistoryServerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  def job(self, user, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def counters(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def conf(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def job_attempts(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def tasks(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task(self, job_id, task_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempts(self, job_id, task_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_counters(self, job_id, task_id):
    job_id = job_id.replace('application', 'job')
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'job_id': job_id, 'task_id': task_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt(self, job_id, task_id, attempt_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt_counters(self, job_id, task_id, attempt_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, headers={'Accept': _JSON_CONTENT_TYPE})
Exemplo n.º 25
0
def _create_query_store_client(request,
                               content_type='application/json; charset=UTF-8'):
    headers = {
        'x-do-as': request.user.username,
        'X-Requested-By': 'das',
        'Content-Type': content_type
    }

    client = HttpClient(QUERY_STORE.SERVER_URL.get())
    client.set_headers(headers)

    if USE_SASL.get():
        client.set_kerberos_auth()

    return client
Exemplo n.º 26
0
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url, user):
    self._url = solr_url
    self._user = user
    self._client = HttpClient(self._url, logger=LOG)
    self.security_enabled = SECURITY_ENABLED.get()
    if self.security_enabled:
      self._client.set_kerberos_auth()
    self._root = Resource(self._client)

  def _get_params(self):
    if self.security_enabled:
      return (('doAs', self._user ),)
    return (('user.name', DEFAULT_USER), ('doAs', self._user),)

  def query(self, solr_query, hue_core):
    try:
      params = self._get_params() + (
          ('q', solr_query['q'] or EMPTY_QUERY.get()),
          ('wt', 'json'),
          ('rows', solr_query['rows']),
          ('start', solr_query['start']),
      )

      params += hue_core.get_query(solr_query)

      fqs = solr_query['fq'].split('|')
      for fq in fqs:
        if fq:
          params += (('fq', fq),)

      response = self._root.get('%(collection)s/select' % solr_query, params)

      if type(response) != dict:
        # Got 'plain/text' mimetype instead of 'application/json'
        try:
          response = json.loads(response)
        except ValueError, e:
          # Got some null bytes in the response
          LOG.error('%s: %s' % (unicode(e), repr(response)))
          response = json.loads(response.replace('\x00', ''))
      return response
    except RestException, e:
      raise PopupException(e, title=_('Error while accessing Solr'))
Exemplo n.º 27
0
class ResourceManagerApi(object):

  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "ResourceManagerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def cluster(self, **kwargs):
    return self._execute(self._root.get, 'cluster', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def apps(self, **kwargs):
    return self._execute(self._root.get, 'cluster/apps', params=kwargs, headers={'Accept': _JSON_CONTENT_TYPE})

  def app(self, app_id):
    return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, app_id):
    return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, data=json.dumps({'state': 'KILLED'}), contenttype=_JSON_CONTENT_TYPE)

  def _execute(self, function, *args, **kwargs):
    response = function(*args, **kwargs)

    # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has
    # failed back to the master RM.
    if isinstance(response, str) and response.startswith('This is standby RM. Redirecting to the current active RM'):
      raise YarnFailoverOccurred(response)

    return response
Exemplo n.º 28
0
class SparkHistoryServerApi(object):

  def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._ui_url = spark_hs_url
    self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "Spark History Server API at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def ui_url(self):
    return self._ui_url

  @property
  def headers(self):
    return {'Accept': _JSON_CONTENT_TYPE}

  def applications(self):
    return self._root.get('applications', headers=self.headers)

  def application(self, app_id):
    return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers)

  def jobs(self, app_id, attempt_id):
    return self._root.get('applications/%(app_id)s/%(attempt_id)s/jobs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)

  def stages(self, app_id, attempt_id):
    return self._root.get('applications/%(app_id)s/%(attempt_id)s/stages' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)

  def executors(self, app_id, attempt_id):
    return self._root.get('applications/%(app_id)s/%(attempt_id)s/executors' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)
Exemplo n.º 29
0
class SparkJob(Application):
    def __init__(self, job, rm_api=None, hs_api=None):
        super(SparkJob, self).__init__(job, rm_api)
        self._resolve_tracking_url()
        if self.status not in ('NEW', 'SUBMITTED', 'ACCEPTED') and hs_api:
            self.history_server_api = hs_api
            self._get_metrics()

    @property
    def logs_url(self):
        log_links = self.history_server_api.get_executors_loglinks(self)
        return log_links[
            'stdout'] if log_links and 'stdout' in log_links else ''

    @property
    def attempt_id(self):
        return self.trackingUrl.strip('/').split('/')[-1]

    def _resolve_tracking_url(self):
        resp = None
        try:
            self._client = HttpClient(self.trackingUrl, logger=LOG)
            self._root = Resource(self._client)
            yarn_cluster = cluster.get_cluster_conf_for_job_submission()
            self._security_enabled = yarn_cluster.SECURITY_ENABLED.get()
            if self._security_enabled:
                self._client.set_kerberos_auth()

            self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get())
            actual_url = self._execute(self._root.resolve_redirect_url)

            if actual_url.strip('/').split('/')[-1] == 'jobs':
                actual_url = actual_url.strip('/').replace('jobs', '')
            self.trackingUrl = actual_url
            LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl)
        except Exception, e:
            LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" %
                     e)
        finally:
Exemplo n.º 30
0
class SparkJob(Application):

  def __init__(self, job, rm_api=None, hs_api=None):
    super(SparkJob, self).__init__(job, rm_api)
    self._resolve_tracking_url()
    if self.status not in ('NEW', 'SUBMITTED', 'ACCEPTED') and hs_api:
      self.history_server_api = hs_api
      self._get_metrics()

  @property
  def logs_url(self):
    log_links = self.history_server_api.get_executors_loglinks(self)
    return log_links['stdout'] if log_links and 'stdout' in log_links else ''

  @property
  def attempt_id(self):
    return self.trackingUrl.strip('/').split('/')[-1]

  def _resolve_tracking_url(self):
    resp = None
    try:
      self._client = HttpClient(self.trackingUrl, logger=LOG)
      self._root = Resource(self._client)
      yarn_cluster = cluster.get_cluster_conf_for_job_submission()
      self._security_enabled = yarn_cluster.SECURITY_ENABLED.get()
      if self._security_enabled:
        self._client.set_kerberos_auth()

      self._client.set_verify(yarn_cluster.SSL_CERT_CA_VERIFY.get())
      actual_url = self._execute(self._root.resolve_redirect_url)

      if actual_url.strip('/').split('/')[-1] == 'jobs':
        actual_url = actual_url.strip('/').replace('jobs', '')
      self.trackingUrl = actual_url
      LOG.debug("SparkJob tracking URL: %s" % self.trackingUrl)
    except Exception, e:
      LOG.warn("Failed to resolve Spark Job's actual tracking URL: %s" % e)
    finally:
Exemplo n.º 31
0
class SolrApi(object):
    """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
    def __init__(self, solr_url):
        self._url = solr_url
        self._client = HttpClient(self._url, logger=LOG)
        if SECURITY_ENABLED.get():
            self._client.set_kerberos_auth()
        self._root = Resource(self._client)

    def query(self, solr_query, hue_core):
        try:
            params = (
                ('q', solr_query['q'] or EMPTY_QUERY.get()),
                ('wt', 'json'),
                ('rows', solr_query['rows']),
                ('start', solr_query['start']),
            )

            params += hue_core.get_query(solr_query)

            fqs = solr_query['fq'].split('|')
            for fq in fqs:
                if fq:
                    params += (('fq', fq), )

            response = self._root.get('%(collection)s/select' % solr_query,
                                      params)

            if type(response) != dict:
                # Got 'plain/text' mimetype instead of 'application/json'
                response = json.loads(response)
            return response
        except RestException, e:
            raise PopupException('Error while accessing Solr: %s' % e)
Exemplo n.º 32
0
class SparkHistoryServerApi(object):

  def __init__(self, spark_hs_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._ui_url = spark_hs_url
    self._url = posixpath.join(spark_hs_url, 'api/%s/' % _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "Spark History Server API at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def ui_url(self):
    return self._ui_url

  @property
  def headers(self):
    return {'Accept': _JSON_CONTENT_TYPE}

  def applications(self):
    return self._root.get('applications', headers=self.headers)

  def application(self, app_id):
    return self._root.get('applications/%(app_id)s' % {'app_id': app_id}, headers=self.headers)

  def jobs(self, app_id):
    return self._root.get('applications/%(app_id)s/jobs' % {'app_id': app_id}, headers=self.headers)

  def stages(self, app_id):
    return self._root.get('applications/%(app_id)s/stages' % {'app_id': app_id}, headers=self.headers)

  def executors(self, job):
    LOG.debug("Getting executors for Spark job %s" % job.jobId)
    app_id = self.get_real_app_id(job)
    if not app_id:
      return []

    return self._root.get('applications/%(app_id)s/executors' % {'app_id': app_id}, headers=self.headers)

  def stage_attempts(self, app_id, stage_id):
    return self._root.get('applications/%(app_id)s/stages/%(stage_id)s' % {'app_id': app_id, 'stage_id': stage_id}, headers=self.headers)

  def stage_attempt(self, app_id, stage_id, stage_attempt_id):
    return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers)

  def task_summary(self, app_id, stage_id, stage_attempt_id):
    return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskSummary' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers)

  def task_list(self, app_id, stage_id, stage_attempt_id):
    return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskList' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers)

  def storages(self, app_id):
    return self._root.get('applications/%(app_id)s/storage/rdd' % {'app_id': app_id}, headers=self.headers)

  def storage(self, app_id, rdd_id):
    return self._root.get('applications/%(app_id)s/storage/rdd/%(rdd_id)s' % {'app_id': app_id, 'rdd_id': rdd_id}, headers=self.headers)

  def download_logs(self, app_id):
    return self._root.get('applications/%(app_id)s/logs' % {'app_id': app_id}, headers=self.headers)

  def download_attempt_logs(self, app_id, attempt_id):
    return self._root.get('applications/%(app_id)s/%(attempt_id)s/logs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers)

  def download_executors_logs(self, request, job, name, offset):
    log_links = self.get_executors_loglinks(job)

    return self.retrieve_log_content(log_links, name, request.user.username, offset)

  def download_executor_logs(self, user, executor, name, offset):
    return self.retrieve_log_content(executor['logs'], name, user.username, offset)

  def retrieve_log_content(self, log_links, log_name, username, offset):
    params = {
      'doAs': username
    }

    if offset != 0:
      params['start'] = offset

    if not log_name or not log_name == 'stderr':
      log_name = 'stdout'

    log = ''
    if log_links and log_name in log_links:
      log_link = log_links[log_name]

      root = Resource(get_log_client(log_link), lib_urlsplit(log_link)[2], urlencode=False)
      response = root.get('', params=params)
      log = html.fromstring(response, parser=html.HTMLParser()).xpath('/html/body/table/tbody/tr/td[2]')[0].text_content()
    return log

  def get_executors_loglinks(self, job):
    executor = None
    if job.metrics and 'executors' in job.metrics and job.metrics['executors']:
      executors = [executor for executor in job.metrics['executors'] if executor[0] == 'driver']  # look up driver executor
      if not executors:
        executor = job.metrics['executors'][0]
      else:
        executor = executors[0]

    return None if not executor else executor[12]

  def get_real_app_id(self, job):
    # https://spark.apache.org/docs/1.6.0/monitoring.html and https://spark.apache.org/docs/2.0.0/monitoring.html
    # When running on Yarn, each application has multiple attempts, so [app-id] is actually [app-id]/[attempt-id] in all cases.
    # When running job as cluster mode, an attempt number is part of application ID, but proxy URL can't be resolved to match
    # Spark history URL. In the applications list, each job's attampt list shows if attempt ID is used and how many attempts.

    try:
      jobs_json = self.applications()
      job_filtered_json = [x for x in jobs_json if x['id'] == job.jobId]

      if not job_filtered_json:
        return {}

      attempts = job_filtered_json[0]['attempts']

      if len(attempts) == 1:
        app_id = job.jobId if 'attemptId' not in attempts[0] else job.jobId + '/' + attempts[0]['attemptId']
      else:
        app_id = job.jobId + '/%d' % len(attempts)

      LOG.debug("Getting real spark app id %s for Spark job %s" % (app_id, job.jobId))
    except Exception as e:
      LOG.error('Cannot get real app id %s: %s' % (job.jobId, e))
      app_id = None

    return app_id
Exemplo n.º 33
0
class SolrApi(object):
    """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """

    def __init__(
        self,
        solr_url,
        user,
        security_enabled=SECURITY_ENABLED.get() if search_enabled() else SECURITY_ENABLED.default,
        ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get(),
    ):
        self._url = solr_url
        self._user = user
        self._client = HttpClient(self._url, logger=LOG)
        self.security_enabled = security_enabled

        if self.security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

        self._root = resource.Resource(self._client)

        # The Kerberos handshake requires two requests in order to authenticate,
        # but if our first request is a PUT/POST, it might flat-out reject the
        # first request if the body is too large. So, connect here in order to get
        # a cookie so future PUT/POSTs will be pre-authenticated.
        if self.security_enabled:
            self._root.invoke("HEAD", "/")

    def query(self, collection, query):
        solr_query = {}

        solr_query["collection"] = collection["name"]

        if query.get("download"):
            solr_query["rows"] = 1000
            solr_query["start"] = 0
        else:
            solr_query["rows"] = int(collection["template"]["rows"] or 10)
            solr_query["start"] = int(query["start"])

        solr_query["rows"] = min(solr_query["rows"], 1000)
        solr_query["start"] = min(solr_query["start"], 10000)

        params = self._get_params() + (
            ("q", self._get_q(query)),
            ("wt", "json"),
            ("rows", solr_query["rows"]),
            ("start", solr_query["start"]),
        )

        if any(collection["facets"]):
            params += (("facet", "true"), ("facet.mincount", 0), ("facet.limit", 10))
            json_facets = {}

            timeFilter = self._get_range_borders(collection, query)

            for facet in collection["facets"]:
                if facet["type"] == "query":
                    params += (("facet.query", "%s" % facet["field"]),)
                elif facet["type"] == "range" or facet["type"] == "range-up":
                    keys = {
                        "id": "%(id)s" % facet,
                        "field": facet["field"],
                        "key": "%(field)s-%(id)s" % facet,
                        "start": facet["properties"]["start"],
                        "end": facet["properties"]["end"],
                        "gap": facet["properties"]["gap"],
                        "mincount": int(facet["properties"]["mincount"]),
                    }

                    if (
                        timeFilter
                        and timeFilter["time_field"] == facet["field"]
                        and (
                            facet["id"] not in timeFilter["time_filter_overrides"]
                            or facet["widgetType"] != "histogram-widget"
                        )
                    ):
                        keys.update(self._get_time_filter_query(timeFilter, facet))

                    params += (
                        (
                            "facet.range",
                            "{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s"
                            % keys,
                        ),
                    )
                elif facet["type"] == "field":
                    keys = {
                        "id": "%(id)s" % facet,
                        "field": facet["field"],
                        "key": "%(field)s-%(id)s" % facet,
                        "limit": int(facet["properties"].get("limit", 10))
                        + (1 if facet["widgetType"] == "facet-widget" else 0),
                        "mincount": int(facet["properties"]["mincount"]),
                    }

                    params += (
                        (
                            "facet.field",
                            "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s"
                            % keys,
                        ),
                    )
                elif facet["type"] == "nested":
                    _f = {
                        "field": facet["field"],
                        "limit": int(facet["properties"].get("limit", 10))
                        + (1 if facet["widgetType"] == "text-facet-widget" else 0),
                        "mincount": int(facet["properties"]["mincount"]),
                        "sort": {"count": facet["properties"]["sort"]},
                    }
                    print facet

                    if facet["properties"]["domain"].get("blockParent") or facet["properties"]["domain"].get(
                        "blockChildren"
                    ):
                        _f["domain"] = {}
                        if facet["properties"]["domain"].get("blockParent"):
                            _f["domain"]["blockParent"] = " OR ".join(facet["properties"]["domain"]["blockParent"])
                        if facet["properties"]["domain"].get("blockChildren"):
                            _f["domain"]["blockChildren"] = " OR ".join(facet["properties"]["domain"]["blockChildren"])

                    if "start" in facet["properties"] and not facet["properties"].get("type") == "field":
                        _f.update(
                            {
                                "type": "range",
                                "start": facet["properties"]["start"],
                                "end": facet["properties"]["end"],
                                "gap": facet["properties"]["gap"],
                            }
                        )
                        if (
                            timeFilter
                            and timeFilter["time_field"] == facet["field"]
                            and (
                                facet["id"] not in timeFilter["time_filter_overrides"]
                                or facet["widgetType"] != "bucket-widget"
                            )
                        ):
                            _f.update(self._get_time_filter_query(timeFilter, facet))
                    else:
                        _f.update(
                            {
                                "type": "terms",
                                "field": facet["field"],
                                "excludeTags": facet["id"],
                                "offset": 0,
                                "numBuckets": True,
                                "allBuckets": True,
                                "prefix": "",
                            }
                        )
                        if facet["properties"]["canRange"] and not facet["properties"]["isDate"]:
                            del _f["mincount"]  # Numeric fields do not support

                    if facet["properties"]["facets"]:
                        self._n_facet_dimension(facet, _f, facet["properties"]["facets"], 1)
                        if facet["widgetType"] == "text-facet-widget":
                            _fname = _f["facet"].keys()[0]
                            _f["sort"] = {_fname: facet["properties"]["sort"]}
                            # domain = '-d2:NaN' # Solr 6.4

                    json_facets[facet["id"]] = _f
                elif facet["type"] == "function":
                    json_facets[facet["id"]] = self._get_aggregate_function(facet)
                    json_facets["processEmpty"] = True
                elif facet["type"] == "pivot":
                    if facet["properties"]["facets"] or facet["widgetType"] == "map-widget":
                        fields = facet["field"]
                        fields_limits = []
                        for f in facet["properties"]["facets"]:
                            fields_limits.append("f.%s.facet.limit=%s" % (f["field"], f["limit"]))
                            fields_limits.append("f.%s.facet.mincount=%s" % (f["field"], f["mincount"]))
                            fields += "," + f["field"]
                        keys = {
                            "id": "%(id)s" % facet,
                            "key": "%(field)s-%(id)s" % facet,
                            "field": facet["field"],
                            "fields": fields,
                            "limit": int(facet["properties"].get("limit", 10)),
                            "mincount": int(facet["properties"]["mincount"]),
                            "fields_limits": " ".join(fields_limits),
                        }
                        params += (
                            (
                                "facet.pivot",
                                "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s"
                                % keys,
                            ),
                        )

            if json_facets:
                params += (("json.facet", json.dumps(json_facets)),)

        params += self._get_fq(collection, query)

        if collection["template"]["fieldsSelected"] and collection["template"]["isGridLayout"]:
            fields = set(
                collection["template"]["fieldsSelected"] + [collection["idField"]] if collection["idField"] else []
            )
            # Add field if needed
            if collection["template"]["leafletmap"].get("latitudeField"):
                fields.add(collection["template"]["leafletmap"]["latitudeField"])
            if collection["template"]["leafletmap"].get("longitudeField"):
                fields.add(collection["template"]["leafletmap"]["longitudeField"])
            if collection["template"]["leafletmap"].get("labelField"):
                fields.add(collection["template"]["leafletmap"]["labelField"])
            fl = urllib.unquote(utf_quoter(",".join(list(fields))))
        else:
            fl = "*"

        nested_fields = self._get_nested_fields(collection)
        if nested_fields:
            fl += urllib.unquote(utf_quoter(',[child parentFilter="%s"]' % " OR ".join(nested_fields)))

        params += (("fl", fl),)

        params += (("hl", "true"), ("hl.fl", "*"), ("hl.snippets", 5), ("hl.fragsize", 1000))

        if collection["template"]["fieldsSelected"]:
            fields = []
            for field in collection["template"]["fieldsSelected"]:
                attribute_field = filter(
                    lambda attribute: field == attribute["name"], collection["template"]["fieldsAttributes"]
                )
                if attribute_field:
                    if attribute_field[0]["sort"]["direction"]:
                        fields.append("%s %s" % (field, attribute_field[0]["sort"]["direction"]))
            if fields:
                params += (("sort", ",".join(fields)),)

        response = self._root.get("%(collection)s/select" % solr_query, params)
        return self._get_json(response)

    def _n_facet_dimension(self, widget, _f, facets, dim):
        facet = facets[0]
        f_name = "dim_%02d:%s" % (dim, facet["field"])

        if facet["aggregate"]["function"] == "count":
            if "facet" not in _f:
                _f["facet"] = {f_name: {}}
            else:
                _f["facet"][f_name] = {}
            _f = _f["facet"]

            _f[f_name] = {
                "type": "terms",
                "field": "%(field)s" % facet,
                "limit": int(facet.get("limit", 10)),
                "mincount": int(facet["mincount"]),
                "numBuckets": True,
                "allBuckets": True,
                "prefix": "",
            }
            if widget["widgetType"] == "tree2-widget" and facets[-1]["aggregate"]["function"] != "count":
                _f["subcount"] = self._get_aggregate_function(facets[-1])

            if len(facets) > 1:  # Get n+1 dimension
                if facets[1]["aggregate"]["function"] == "count":
                    self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1)
                else:
                    self._n_facet_dimension(widget, _f[f_name], facets[1:], dim)
        else:
            agg_function = self._get_aggregate_function(facet)
            _f["facet"] = {"agg_%02d_00:%s" % (dim, agg_function): agg_function}
            for i, _f_agg in enumerate(facets[1:], 1):
                if _f_agg["aggregate"]["function"] != "count":
                    agg_function = self._get_aggregate_function(_f_agg)
                    _f["facet"]["agg_%02d_%02d:%s" % (dim, i, agg_function)] = agg_function
                else:
                    self._n_facet_dimension(widget, _f, facets[i:], dim + 1)  # Get n+1 dimension
                    break

    def suggest(self, collection, query):
        try:
            params = self._get_params() + (
                ("suggest", "true"),
                ("suggest.build", "true"),
                ("suggest.q", query["q"]),
                ("wt", "json"),
            )
            if query.get("dictionary"):
                params += (("suggest.dictionary", query["dictionary"]),)
            response = self._root.get("%s/suggest" % collection, params)
            return self._get_json(response)
        except RestException, e:
            raise PopupException(e, title=_("Error while accessing Solr"))
Exemplo n.º 34
0
class JobServerApi(object):
    def __init__(self, livy_url):
        self._url = posixpath.join(livy_url)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = SECURITY_ENABLED.get()
        self._thread_local = threading.local()

        if self.security_enabled:
            self._client.set_kerberos_auth()

    def __str__(self):
        return "JobServerApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def user(self):
        return self._thread_local.user

    def setuser(self, user):
        if hasattr(user, 'username'):
            self._thread_local.user = user.username
        else:
            self._thread_local.user = user

    def get_status(self):
        return self._root.get('sessions')

    def get_log(self, uuid, startFrom=None, size=None):
        params = {}

        if startFrom is not None:
            params['from'] = startFrom

        if size is not None:
            params['size'] = size

        response = self._root.get('sessions/%s/log' % uuid, params=params)

        return '\n'.join(response['log'])

    def create_session(self, **properties):
        properties['proxyUser'] = self.user
        return self._root.post('sessions',
                               data=json.dumps(properties),
                               contenttype=_JSON_CONTENT_TYPE)

    def get_sessions(self):
        return self._root.get('sessions')

    def get_session(self, uuid):
        return self._root.get('sessions/%s' % uuid)

    def get_statements(self, uuid):
        return self._root.get('sessions/%s/statements' % uuid)

    def submit_statement(self, uuid, statement):
        data = {'code': statement}
        return self._root.post('sessions/%s/statements' % uuid,
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def inspect(self, uuid, statement):
        data = {'code': statement}
        return self._root.post('sessions/%s/inspect' % uuid,
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def fetch_data(self, session, statement):
        return self._root.get('sessions/%s/statements/%s' %
                              (session, statement))

    def cancel(self, session):
        return self._root.post('sessions/%s/interrupt' % session)

    def close(self, uuid):
        return self._root.delete('sessions/%s' % uuid)

    def get_batches(self):
        return self._root.get('batches')

    def submit_batch(self, properties):
        properties['proxyUser'] = self.user
        return self._root.post('batches',
                               data=json.dumps(properties),
                               contenttype=_JSON_CONTENT_TYPE)

    def get_batch(self, uuid):
        return self._root.get('batches/%s' % uuid)

    def get_batch_status(self, uuid):
        response = self._root.get('batches/%s/state' % uuid)
        return response['state']

    def get_batch_log(self, uuid, startFrom=None, size=None):
        params = {}

        if startFrom is not None:
            params['from'] = startFrom

        if size is not None:
            params['size'] = size

        response = self._root.get('batches/%s/log' % uuid, params=params)

        return '\n'.join(response['log'])

    def close_batch(self, uuid):
        return self._root.delete('batches/%s' % uuid)
Exemplo n.º 35
0
class JobServerApi(object):

  def __init__(self, livy_url):
    self._url = posixpath.join(livy_url)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = SECURITY_ENABLED.get()
    self._csrf_enabled = CSRF_ENABLED.get()
    self._thread_local = threading.local()

    if self.security_enabled:
      self._client.set_kerberos_auth()

    if self.csrf_enabled:
      self._client.set_headers({'X-Requested-By' : 'hue'})

    self._client.set_verify(SSL_CERT_CA_VERIFY.get())

  def __str__(self):
    return "JobServerApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  @property
  def csrf_enabled(self):
    return self._csrf_enabled

  @property
  def user(self):
    return self._thread_local.user

  def setuser(self, user):
    if hasattr(user, 'username'):
      self._thread_local.user = user.username
    else:
      self._thread_local.user = user

  def get_status(self):
    return self._root.get('sessions')

  def get_log(self, uuid, startFrom=None, size=None):
    params = {}

    if startFrom is not None:
      params['from'] = startFrom

    if size is not None:
      params['size'] = size

    response = self._root.get('sessions/%s/log' % uuid, params=params)

    return '\n'.join(response['log'])

  def create_session(self, **properties):
    properties['proxyUser'] = self.user
    return self._root.post('sessions', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE)

  def get_sessions(self):
    return self._root.get('sessions')

  def get_session(self, uuid):
    return self._root.get('sessions/%s' % uuid)

  def get_statements(self, uuid):
    return self._root.get('sessions/%s/statements' % uuid)

  def submit_statement(self, uuid, statement):
    data = {'code': statement}
    return self._root.post('sessions/%s/statements' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

  def inspect(self, uuid, statement):
    data = {'code': statement}
    return self._root.post('sessions/%s/inspect' % uuid, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

  def fetch_data(self, session, statement):
    return self._root.get('sessions/%s/statements/%s' % (session, statement))

  def cancel(self, session):
    return self._root.post('sessions/%s/interrupt' % session)

  def close(self, uuid):
    return self._root.delete('sessions/%s' % uuid)

  def get_batches(self):
    return self._root.get('batches')

  def submit_batch(self, properties):
    properties['proxyUser'] = self.user
    return self._root.post('batches', data=json.dumps(properties), contenttype=_JSON_CONTENT_TYPE)

  def get_batch(self, uuid):
    return self._root.get('batches/%s' % uuid)

  def get_batch_status(self, uuid):
    response = self._root.get('batches/%s/state' % uuid)
    return response['state']

  def get_batch_log(self, uuid, startFrom=None, size=None):
    params = {}

    if startFrom is not None:
      params['from'] = startFrom

    if size is not None:
      params['size'] = size

    response = self._root.get('batches/%s/log' % uuid, params=params)

    return '\n'.join(response['log'])

  def close_batch(self, uuid):
    return self._root.delete('batches/%s' % uuid)
Exemplo n.º 36
0
Arquivo: base.py Projeto: 10sr/hue
class SqoopClient(object):

  STATUS_GOOD = ('FINE', 'ACCEPTABLE')
  STATUS_BAD = ('UNACCEPTABLE', 'FAILURE_ON_SUBMIT')

  def __init__(self, url, username, language='en'):
    self._url = url
    self._client = HttpClient(self._url, logger=LOG)
    self._root = SqoopResource(self._client)
    self._language = language
    self._username = username

    if has_sqoop_has_security():
      self._client.set_kerberos_auth()
    self._security_enabled = has_sqoop_has_security()

  def __str__(self):
    return "SqoopClient at %s with security %s" % (self._url, self._security_enabled)

  @property
  def url(self):
    return self._url

  @property
  def headers(self):
    return {
      'Accept': 'application/json',
      'Accept-Language': self._language,
      'sqoop-user-name': self._username
    }

  def get_version(self):
    return self._root.get('version', headers=self.headers)

  def get_driver(self):
    resp_dict = self._root.get('%s/driver' % API_VERSION, headers=self.headers)
    driver = Driver.from_dict(resp_dict)
    return driver

  def get_connectors(self):
    resp_dict = self._root.get('%s/connectors' % API_VERSION, headers=self.headers)
    connectors = [ Connector.from_dict(connector_dict) for connector_dict in resp_dict['connectors'] ]
    return connectors

  def get_connector(self, connector_id):
    resp_dict = self._root.get('%s/connector/%d/' % (API_VERSION, connector_id), headers=self.headers)
    if resp_dict['connector']:
      return Connector.from_dict(resp_dict['connector'])
    return None

  def get_links(self):
    resp_dict = self._root.get('%s/links' % API_VERSION, headers=self.headers)
    links = [Link.from_dict(link_dict) for link_dict in resp_dict['links']]
    return links

  def get_link(self, link_id):
    resp_dict = self._root.get('%s/link/%d/' % (API_VERSION, link_id), headers=self.headers)
    if resp_dict['link']:
      return Link.from_dict(resp_dict['link'])
    return None

  def create_link(self, link):
    link.creation_date = int( round(time.time() * 1000) )
    link.update_date = link.creation_date
    link_dict = link.to_dict()
    request_dict = {
      'link': link_dict
    }
    resp = self._root.post('%s/link/' % API_VERSION, data=json.dumps(request_dict), headers=self.headers)

    # Lame check that iterates to make sure we have an error
    # Server responds with: {'validation-result': [{},{}]} or {'validation-result': [{KEY: ERROR},{KEY: ERROR}]}
    for result in resp['validation-result']:
      if result:
        raise SqoopException.from_dicts(resp['validation-result'])

    link.id = resp['id']
    return link

  def update_link(self, link):
    if not link.link_config_values:
      link.link_config_values = self.get_connectors()[0].link_config
    link.updated = int( round(time.time() * 1000) )
    link_dict = link.to_dict()
    request_dict = {
      'link': link_dict
    }
    resp = self._root.put('%s/link/%d/' % (API_VERSION, link.id), data=json.dumps(request_dict), headers=self.headers)
    
    # Lame check that iterates to make sure we have an error
    # Server responds with: {'validation-result': [{},{}]} or {'validation-result': [{KEY: ERROR},{KEY: ERROR}]}
    for result in resp['validation-result']:
      if result:
        raise SqoopException.from_dicts(resp['validation-result'])

    return link

  def delete_link(self, link):
    resp = self._root.delete('%s/link/%d/' % (API_VERSION, link.id), headers=self.headers)
    return None

  def get_jobs(self):
    resp_dict = self._root.get('%s/jobs' % API_VERSION, headers=self.headers)
    jobs = [Job.from_dict(job_dict) for job_dict in resp_dict['jobs']]
    return jobs

  def get_job(self, job_id):
    resp_dict = self._root.get('%s/job/%d/' % (API_VERSION, job_id), headers=self.headers)
    if resp_dict['job']:
      return Job.from_dict(resp_dict['job'])
    return None

  def create_job(self, job):
    if not job.from_config_values:
      job.from_config_values = self.get_connectors()[0].job_config['FROM']
    if not job.to_config_values:
      job.to_config_values = self.get_connectors()[0].job_config['TO']
    if not job.driver_config_values:
      job.driver_config_values = self.get_driver().job_config
    job.creation_date = int( round(time.time() * 1000) )
    job.update_date = job.creation_date
    job_dict = job.to_dict()
    request_dict = {
      'job': job_dict
    }
    resp = self._root.post('%s/job/' % API_VERSION, data=json.dumps(request_dict), headers=self.headers)
    if 'id' not in resp:
      raise SqoopException.from_dicts(resp['validation-result'])
    job.id = resp['id']
    return job

  def update_job(self, job):
    if not job.from_config_values:
      job.from_config_values = self.get_connectors()[0].job_config['FROM']
    if not job.to_config_values:
      job.to_config_values = self.get_connectors()[0].job_config['TO']
    if not job.driver_config_values:
      job.driver_config_values = self.get_driver().job_config
    job.updated = int( round(time.time() * 1000) )
    job_dict = job.to_dict()
    request_dict = {
      'job': job_dict
    }
    resp = self._root.put('%s/job/%d/' % (API_VERSION, job.id), data=json.dumps(request_dict), headers=self.headers)

    # Lame check that iterates to make sure we have an error
    # Server responds with: {'validation-result': [{},{}]} or {'validation-result': [{KEY: ERROR},{KEY: ERROR}]}
    for result in resp['validation-result']:
      if result:
        raise SqoopException.from_dicts(resp['validation-result'])

    return job

  def delete_job(self, job):
    resp_dict = self._root.delete('%s/job/%s' % (API_VERSION, job.id), headers=self.headers)
    return None

  def get_job_status(self, job):
    resp_dict = self._root.get('%s/job/%d/status' % (API_VERSION, job.id), headers=self.headers)
    return Submission.from_dict(resp_dict['submission'])

  def start_job(self, job):
    resp_dict = self._root.put('%s/job/%d/start' % (API_VERSION, job.id), headers=self.headers)
    if resp_dict['submission']['status'] in SqoopClient.STATUS_BAD:
      raise SqoopSubmissionException.from_dict(resp_dict['submission'])
    return Submission.from_dict(resp_dict['submission'])

  def stop_job(self, job):
    resp_dict = self._root.put('%s/job/%d/stop' % (API_VERSION, job.id), headers=self.headers)
    return Submission.from_dict(resp_dict['submission'])

  def get_submissions(self):
    resp_dict = self._root.get('%s/submissions' % API_VERSION, headers=self.headers)
    submissions = [Submission.from_dict(submission_dict) for submission_dict in resp_dict['submissions']]
    return submissions

  def set_user(self, user):
    self._user = user

  def set_language(self, language):
    self._language = language
Exemplo n.º 37
0
class ImpalaDaemonApi(object):
    def __init__(self, server_url):
        self._url = server_url
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = is_kerberos_enabled()
        self._webserver_spnego_enabled = is_webserver_spnego_enabled()
        self._thread_local = threading.local()

        # You can set username/password for Impala Web UI which overrides kerberos
        if DAEMON_API_USERNAME.get() is not None and DAEMON_API_PASSWORD.get(
        ) is not None:
            if DAEMON_API_AUTH_SCHEME.get().lower() == 'basic':
                self._client.set_basic_auth(DAEMON_API_USERNAME.get(),
                                            DAEMON_API_PASSWORD.get())
                LOG.info(
                    "Using username and password for basic authentication")
            else:
                self._client.set_digest_auth(DAEMON_API_USERNAME.get(),
                                             DAEMON_API_PASSWORD.get())
                LOG.info(
                    'Using username and password for digest authentication')
        elif self._webserver_spnego_enabled or self._security_enabled:
            self._client.set_kerberos_auth()
            LOG.info('Using kerberos principal for authentication')

    def __str__(self):
        return "ImpalaDaemonApi at %s" % self._url

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def user(self):
        return self._thread_local.user

    def set_user(self, user):
        if hasattr(user, 'username'):
            self._thread_local.user = user.username
        else:
            self._thread_local.user = user

    def get_queries(self):
        params = {'json': 'true'}

        resp = self._root.get('queries', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi did not return valid JSON: %s' % e)

    def get_query(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}

        resp = self._root.get('query_plan', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi did not return valid JSON: %s' % e)

    def get_query_profile(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}

        resp = self._root.get('query_profile', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi query_profile did not return valid JSON: %s' %
                e)

    def get_query_memory(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}

        resp = self._root.get('query_memory', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi query_memory did not return valid JSON: %s' %
                e)

    def kill(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}
        resp = self._root.get('cancel_query', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi kill did not return valid JSON: %s' % e)

    def get_query_backends(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}

        resp = self._root.get('query_backends', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi query_backends did not return valid JSON: %s'
                % e)

    def get_query_finstances(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}

        resp = self._root.get('query_finstances', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi query_finstances did not return valid JSON: %s'
                % e)

    def get_query_summary(self, query_id):
        params = {'query_id': query_id, 'json': 'true'}

        resp = self._root.get('query_summary', params=params)
        try:
            if isinstance(resp, basestring):
                return json.loads(resp)
            else:
                return resp
        except ValueError as e:
            raise ImpalaDaemonApiException(
                'ImpalaDaemonApi query_summary did not return valid JSON: %s' %
                e)

    def get_query_profile_encoded(self, query_id):
        params = {'query_id': query_id}

        return self._root.get('query_profile_encoded', params=params)
Exemplo n.º 38
0
class OozieApi(object):
  def __init__(self, oozie_url, user, security_enabled=False, api_version=API_VERSION, ssl_cert_ca_verify=True):
    self._url = posixpath.join(oozie_url, api_version)
    self._client = HttpClient(self._url, logger=LOG)

    if security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    # To store username info
    if hasattr(user, 'username'):
      self.user = user.username
    else:
      self.user = user
    self.api_version = api_version

  def __str__(self):
    return "OozieApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def _get_params(self):
    if self.security_enabled:
      return { 'doAs': self.user, 'timezone': TIME_ZONE.get() }
    return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() }

  def _get_oozie_properties(self, properties=None):
    defaults = {
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)

    return defaults

  VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime')
  VALID_LOG_FILTERS = {'recent', 'limit', 'loglevel', 'text'}

  def get_jobs(self, jobtype, offset=None, cnt=None, filters=None):
    """
    Get a list of Oozie jobs.

    Note that offset is 1-based.
    kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status
    """
    params = self._get_params()
    if offset is not None:
      params['offset'] = str(offset)
    if cnt is not None:
      params['len'] = str(cnt)
    if filters is None:
      filters = []
    params['jobtype'] = jobtype

    filter_list = []
    for key, val in filters:
      if key not in OozieApi.VALID_JOB_FILTERS:
        raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,))
      filter_list.append('%s=%s' % (key, val))
    params['filter'] = ';'.join(filter_list)

    # Send the request
    resp = self._root.get('jobs', params)
    if jobtype == 'wf':
      wf_list = WorkflowList(self, resp, filters=filters)
    elif jobtype == 'coord':
      wf_list = CoordinatorList(self, resp, filters=filters)
    else:
      wf_list = BundleList(self, resp, filters=filters)
    return wf_list

  def get_workflows(self, offset=None, cnt=None, filters=None):
    return self.get_jobs('wf', offset, cnt, filters)

  def get_coordinators(self, offset=None, cnt=None, filters=None):
    return self.get_jobs('coord', offset, cnt, filters)

  def get_bundles(self, offset=None, cnt=None, filters=None):
    return self.get_jobs('bundle', offset, cnt, filters)

  # TODO: make get_job accept any jobid
  def get_job(self, jobid):
    """
    get_job(jobid) -> Workflow
    """
    params = self._get_params()
    resp = self._root.get('job/%s' % (jobid,), params)
    wf = Workflow(self, resp)
    return wf

  def get_coordinator(self, jobid, offset=None, cnt=None, filters=None):
    params = self._get_params()
    if offset is not None:
      params['offset'] = str(offset)
    if cnt is not None:
      params['len'] = str(cnt)
    if filters is None:
      filters = {}
    params.update({'order': 'desc'})

    filter_list = []
    for key, val in filters:
      if key not in OozieApi.VALID_JOB_FILTERS:
        raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,))
      filter_list.append('%s=%s' % (key, val))
    params['filter'] = ';'.join(filter_list)

    resp = self._root.get('job/%s' % (jobid,), params)
    return Coordinator(self, resp)

  def get_bundle(self, jobid):
    params = self._get_params()
    resp = self._root.get('job/%s' % (jobid,), params)
    return Bundle(self, resp)

  def get_job_definition(self, jobid):
    """
    get_job_definition(jobid) -> Definition (xml string)
    """
    params = self._get_params()
    params['show'] = 'definition'
    return self._root.get('job/%s' % (jobid,), params)


  def get_job_log(self, jobid, logfilter=None):
    """
    get_job_log(jobid) -> Log (xml string)
    """
    params = self._get_params()
    params['show'] = 'log'

    filter_list = []
    if logfilter is None:
      logfilter = []
    for key, val in logfilter:
      if key not in OozieApi.VALID_LOG_FILTERS:
        raise ValueError('"%s" is not a valid filter for job logs' % (key,))
      filter_list.append('%s=%s' % (key, val))
    params['logfilter'] = ';'.join(filter_list)
    return self._root.get('job/%s' % (jobid,), params)


  def get_job_status(self, jobid):
    params = self._get_params()
    params['show'] = 'status'

    xml = self._root.get('job/%s' % (jobid,), params)
    return xml

  def get_action(self, action_id):
    if 'C@' in action_id:
      Klass = CoordinatorAction
    elif 'B@' in action_id:
      Klass = BundleAction
    else:
      Klass = WorkflowAction
    params = self._get_params()
    resp = self._root.get('job/%s' % (action_id,), params)
    return Klass(resp)

  def job_control(self, jobid, action, properties=None, parameters=None):
    """
    job_control(jobid, action) -> None
    Raise RestException on error.
    """
    if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun', 'change', 'ignore'):
      msg = 'Invalid oozie job action: %s' % (action,)
      LOG.error(msg)
      raise ValueError(msg)
    properties = self._get_oozie_properties(properties)
    params = self._get_params()
    params['action'] = action
    if parameters is not None:
      params.update(parameters)

    return self._root.put('job/%s' % jobid, params,  data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

  def submit_workflow(self, application_path, properties=None):
    """
    submit_workflow(application_path, properties=None) -> jobid

    Raise RestException on error.
    """
    defaults = {
      'oozie.wf.application.path': application_path,
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)
    properties = defaults

    return self.submit_job(properties)

  # Is name actually submit_coord?
  def submit_job(self, properties=None):
    """
    submit_job(properties=None, id=None) -> jobid

    Raise RestException on error.
    """
    defaults = {
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)

    properties = defaults

    params = self._get_params()
    resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)
    return resp['id']

  def dryrun(self, properties=None):
    defaults = {
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)

    properties = defaults

    params = self._get_params()
    params['action'] = 'dryrun'
    return self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

  def rerun(self, jobid, properties=None, params=None):
    properties = self._get_oozie_properties(properties)
    if params is None:
      params = self._get_params()
    else:
      self._get_params().update(params)

    params['action'] = 'rerun'

    return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

  def get_build_version(self):
    """
    get_build_version() -> Build version (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/build-version', params)
    return resp

  def get_instrumentation(self):
    params = self._get_params()
    resp = self._root.get('admin/instrumentation', params)
    return resp

  def get_metrics(self):
    params = self._get_params()
    resp = self._root.get('admin/metrics', params)
    return resp

  def get_configuration(self):
    """
    get_configuration() -> Oozie config (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/configuration', params)
    return resp

  def get_oozie_status(self):
    """
    get_oozie_status() -> Oozie status (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/status', params)
    return resp

  def get_oozie_slas(self, **kwargs):
    """
    filter=
      app_name=my-sla-app
      id=0000002-131206135002457-oozie-oozi-W
      nominal_start=2013-06-18T00:01Z
      nominal_end=2013-06-23T00:01Z
    """
    params = self._get_params()
    params['filter'] = ';'.join(['%s=%s' % (key, val) for key, val in kwargs.iteritems()])
    resp = self._root.get('sla', params)
    return resp['slaSummaryList']
Exemplo n.º 39
0
class OozieApi(object):
    def __init__(self, oozie_url, security_enabled=False, api_version=API_VERSION):
        self._url = posixpath.join(oozie_url, api_version)
        self._client = HttpClient(self._url, logger=LOG)
        if security_enabled:
            self._client.set_kerberos_auth()
        self._root = Resource(self._client)
        self._security_enabled = security_enabled
        # To store username info
        self._thread_local = threading.local()
        self.api_version = api_version

    def __str__(self):
        return "OozieApi at %s" % (self._url,)

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def user(self):
        return self._thread_local.user

    def setuser(self, user):
        if hasattr(user, "username"):
            self._thread_local.user = user.username
        else:
            self._thread_local.user = user

    def _get_params(self):
        if self.security_enabled:
            return {"doAs": self.user, "timezone": TIME_ZONE.get()}
        return {"user.name": DEFAULT_USER, "doAs": self.user, "timezone": TIME_ZONE.get()}

    def _get_oozie_properties(self, properties=None):
        defaults = {"user.name": self.user}

        if properties is not None:
            defaults.update(properties)

        return defaults

    VALID_JOB_FILTERS = ("name", "user", "group", "status")

    def get_jobs(self, jobtype, offset=None, cnt=None, **kwargs):
        """
    Get a list of Oozie jobs.

    jobtype is 'wf', 'coord'
    Note that offset is 1-based.
    kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status
    """
        params = self._get_params()
        if offset is not None:
            params["offset"] = str(offset)
        if cnt is not None:
            params["len"] = str(cnt)
        params["jobtype"] = jobtype

        filter_list = []
        for key, val in kwargs.iteritems():
            if key not in OozieApi.VALID_JOB_FILTERS:
                raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,))
            filter_list.append("%s=%s" % (key, val))
        params["filter"] = ";".join(filter_list)

        # Send the request
        resp = self._root.get("jobs", params)
        if jobtype == "wf":
            wf_list = WorkflowList(self, resp, filters=kwargs)
        elif jobtype == "coord":
            wf_list = CoordinatorList(self, resp, filters=kwargs)
        else:
            wf_list = BundleList(self, resp, filters=kwargs)
        return wf_list

    def get_workflows(self, offset=None, cnt=None, **kwargs):
        return self.get_jobs("wf", offset, cnt, **kwargs)

    def get_coordinators(self, offset=None, cnt=None, **kwargs):
        return self.get_jobs("coord", offset, cnt, **kwargs)

    def get_bundles(self, offset=None, cnt=None, **kwargs):
        return self.get_jobs("bundle", offset, cnt, **kwargs)

    # TODO: make get_job accept any jobid
    def get_job(self, jobid):
        """
    get_job(jobid) -> Workflow
    """
        params = self._get_params()
        resp = self._root.get("job/%s" % (jobid,), params)
        wf = Workflow(self, resp)
        return wf

    def get_coordinator(self, jobid):
        params = self._get_params()
        params.update({"len": -1})
        resp = self._root.get("job/%s" % (jobid,), params)
        return Coordinator(self, resp)

    def get_bundle(self, jobid):
        params = self._get_params()
        resp = self._root.get("job/%s" % (jobid,), params)
        return Bundle(self, resp)

    def get_job_definition(self, jobid):
        """
    get_job_definition(jobid) -> Definition (xml string)
    """
        params = self._get_params()
        params["show"] = "definition"
        xml = self._root.get("job/%s" % (jobid,), params)
        return xml

    def get_job_log(self, jobid):
        """
    get_job_log(jobid) -> Log (xml string)
    """
        params = self._get_params()
        params["show"] = "log"
        xml = self._root.get("job/%s" % (jobid,), params)
        return xml

    def get_action(self, action_id):
        if "C@" in action_id:
            Klass = CoordinatorAction
        elif "B@" in action_id:
            Klass = BundleAction
        else:
            Klass = WorkflowAction
        params = self._get_params()
        resp = self._root.get("job/%s" % (action_id,), params)
        return Klass(resp)

    def job_control(self, jobid, action, properties=None, parameters=None):
        """
    job_control(jobid, action) -> None
    Raise RestException on error.
    """
        if action not in ("start", "suspend", "resume", "kill", "rerun", "coord-rerun", "bundle-rerun"):
            msg = "Invalid oozie job action: %s" % (action,)
            LOG.error(msg)
            raise ValueError(msg)
        properties = self._get_oozie_properties(properties)
        params = self._get_params()
        params["action"] = action
        if parameters is not None:
            params.update(parameters)

        return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

    def submit_workflow(self, application_path, properties=None):
        """
    submit_workflow(application_path, properties=None) -> jobid

    Raise RestException on error.
    """
        defaults = {"oozie.wf.application.path": application_path, "user.name": self.user}

        if properties is not None:
            defaults.update(properties)
        properties = defaults

        return self.submit_job(properties)

    # Is name actually submit_coord?
    def submit_job(self, properties=None):
        """
    submit_job(properties=None, id=None) -> jobid

    Raise RestException on error.
    """
        defaults = {"user.name": self.user}

        if properties is not None:
            defaults.update(properties)

        properties = defaults

        params = self._get_params()
        resp = self._root.post("jobs", params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)
        return resp["id"]

    def rerun(self, jobid, properties=None, params=None):
        properties = self._get_oozie_properties(properties)
        if params is None:
            params = self._get_params()
        else:
            self._get_params().update(params)

        params["action"] = "rerun"

        return self._root.put("job/%s" % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

    def get_build_version(self):
        """
    get_build_version() -> Build version (dictionary)
    """
        params = self._get_params()
        resp = self._root.get("admin/build-version", params)
        return resp

    def get_instrumentation(self):
        params = self._get_params()
        resp = self._root.get("admin/instrumentation", params)
        return resp

    def get_configuration(self):
        """
    get_configuration() -> Oozie config (dictionary)
    """
        params = self._get_params()
        resp = self._root.get("admin/configuration", params)
        return resp

    def get_oozie_status(self):
        """
    get_oozie_status() -> Oozie status (dictionary)
    """
        params = self._get_params()
        resp = self._root.get("admin/status", params)
        return resp

    def get_oozie_slas(self, **kwargs):
        """
    filter=
      app_name=my-sla-app
      id=0000002-131206135002457-oozie-oozi-W
      nominal_start=2013-06-18T00:01Z
      nominal_end=2013-06-23T00:01Z
    """
        params = self._get_params()
        params["filter"] = ";".join(["%s=%s" % (key, val) for key, val in kwargs.iteritems()])
        resp = self._root.get("sla", params)
        return resp["slaSummaryList"]
Exemplo n.º 40
0
class MapreduceApi(object):

  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'proxy')
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "MapreduceApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  def job(self, user, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def counters(self, job_id):
    app_id = job_id.replace('job', 'application')
    response = self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})
    # If it hits the job history server, it will return HTML.
    # Simply return None in this case because there isn't much data there.
    if isinstance(response, basestring):
      return None
    else:
      return response

  def tasks(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def job_attempts(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def conf(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_counters(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempts(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt(self, job_id, task_id, attempt_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, job_id):
    app_id = job_id.replace('job', 'application')
    get_resource_manager().kill(app_id) # We need to call the RM
Exemplo n.º 41
0
class OozieApi(object):
    def __init__(self, oozie_url, security_enabled=False):
        self._url = posixpath.join(oozie_url, API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        if security_enabled:
            self._client.set_kerberos_auth()
        self._root = Resource(self._client)
        self._security_enabled = security_enabled

        # To store user info
        self._thread_local = threading.local()

    def __str__(self):
        return "OozieApi at %s" % (self._url,)

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def user(self):
        try:
            return self._thread_local.user
        except AttributeError:
            return DEFAULT_USER

    def setuser(self, user):
        """Return the previous user"""
        prev = self.user
        self._thread_local.user = user
        return prev

    def _get_params(self):
        if self.security_enabled:
            return {"doAs": self.user}
        return {"user.name": DEFAULT_USER, "doAs": self.user}

    VALID_JOB_FILTERS = ("name", "user", "group", "status")

    def get_jobs(self, offset=None, cnt=None, **kwargs):
        """
    get_jobs(offset=None, cnt=None, **kwargs) -> WorkflowList

    Note that offset is 1-based.
    kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status
    """
        params = self._get_params()
        if offset is not None:
            params["offset"] = str(offset)
        if cnt is not None:
            params["len"] = str(cnt)

        filter_list = []
        for key, val in kwargs:
            if key not in OozieApi.VALID_JOB_FILTERS:
                raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,))
            filter_list.append("%s=%s" % (key, val))
        params["filter"] = ";".join(filter_list)

        # Send the request
        resp = self._root.get("jobs", params)
        wf_list = WorkflowList(self, resp, filters=kwargs)
        return wf_list

    def get_job(self, jobid):
        """
    get_job(jobid) -> Workflow
    """
        params = self._get_params()
        resp = self._root.get("job/%s" % (jobid,), params)
        wf = Workflow(self, resp)
        return wf

    def get_job_definition(self, jobid):
        """
    get_job_definition(jobid) -> Definition (xml string)
    """
        params = self._get_params()
        params["show"] = "definition"
        xml = self._root.get("job/%s" % (jobid,), params)
        return xml

    def get_job_log(self, jobid):
        """
    get_job_log(jobid) -> Log (xml string)
    """
        params = self._get_params()
        params["show"] = "log"
        xml = self._root.get("job/%s" % (jobid,), params)
        return xml

    def job_control(self, jobid, action):
        """
    job_control(jobid, action) -> None
    Raise RestException on error.
    """
        if action not in ("start", "suspend", "resume", "kill"):
            msg = "Invalid oozie job action: %s" % (action,)
            LOG.error(msg)
            raise ValueError(msg)
        params = self._get_params()
        params["action"] = action
        self._root.put("job/%s" % (jobid,), params)

    def submit_workflow(self, application_path, properties=None):
        """
    submit_workflow(application_path, username, properties=None) -> jobid

    Submit a job to Oozie. May raise PopupException.
    """
        defaults = {"oozie.wf.application.path": application_path, "user.name": self.user}
        if properties is not None:
            defaults.update(properties)
            properties = defaults
        else:
            properties = defaults

        params = self._get_params()
        resp = self._root.post("jobs", params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)
        return resp["id"]

    def get_build_version(self):
        """
    get_build_version() -> Build version (dictionary)
    """
        params = self._get_params()
        resp = self._root.get("admin/build-version", params)
        return resp

    def get_instrumentation(self):
        """
    get_instrumentation() -> Oozie instrumentation (dictionary)
    """
        params = self._get_params()
        resp = self._root.get("admin/instrumentation", params)
        return resp

    def get_configuration(self):
        """
    get_configuration() -> Oozie config (dictionary)
    """
        params = self._get_params()
        resp = self._root.get("admin/configuration", params)
        return resp

    def get_oozie_status(self):
        """
    get_oozie_status() -> Oozie status (dictionary)
    """
        params = self._get_params()
        resp = self._root.get("admin/status", params)
        return resp
Exemplo n.º 42
0
class ManagerApi(object):
    """
  https://cloudera.github.io/cm_api/
  """
    def __init__(self,
                 user=None,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._api_url = '%s/%s' % (MANAGER.API_URL.get().strip('/'), VERSION)
        self._username = get_navigator_auth_username()
        self._password = get_navigator_auth_password()

        self.user = user
        self._client = HttpClient(self._api_url, logger=LOG)

        if security_enabled:
            self._client.set_kerberos_auth()
        else:
            self._client.set_basic_auth(self._username, self._password)

        self._client.set_verify(ssl_cert_ca_verify)
        self._root = Resource(self._client)

    def has_service(self, service_name, cluster_name=None):
        cluster = self._get_cluster(cluster_name)
        try:
            services = self._root.get(
                'clusters/%(cluster_name)s/serviceTypes' % {
                    'cluster_name': cluster['name'],
                    'service_name': service_name
                })['items']

            return service_name in services
        except RestException as e:
            raise ManagerApiException(e)

    def get_spark_history_server_configs(self, cluster_name=None):
        service_name = "SPARK_ON_YARN"
        shs_role_type = "SPARK_YARN_HISTORY_SERVER"

        try:
            cluster = self._get_cluster(cluster_name)
            services = self._root.get('clusters/%(cluster_name)s/services' % {
                'cluster_name': cluster['name'],
                'service_name': service_name
            })['items']

            service_display_names = [
                service['displayName'] for service in services
                if service['type'] == service_name
            ]

            if service_display_names:
                spark_service_display_name = service_display_names[0]

                servers = self._root.get(
                    'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles'
                    % {
                        'cluster_name': cluster['name'],
                        'spark_service_display_name':
                        spark_service_display_name
                    })['items']

                shs_server_names = [
                    server['name'] for server in servers
                    if server['type'] == shs_role_type
                ]
                shs_server_name = shs_server_names[
                    0] if shs_server_names else None
                shs_server_hostRef = [
                    server['hostRef'] for server in servers
                    if server['type'] == shs_role_type
                ]
                shs_server_hostId = shs_server_hostRef[0][
                    'hostId'] if shs_server_hostRef else None

                if shs_server_name and shs_server_hostId:
                    shs_server_configs = self._root.get(
                        'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config'
                        % {
                            'cluster_name': cluster['name'],
                            'spark_service_display_name':
                            spark_service_display_name,
                            'shs_server_name': shs_server_name
                        },
                        params={'view': 'full'})['items']
                    return shs_server_hostId, shs_server_configs
        except Exception as e:
            LOG.warning("Check Spark History Server via ManagerApi: %s" % e)

        return None, None

    def get_spark_history_server_url(self, cluster_name=None):
        shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs(
            cluster_name=cluster_name)

        if shs_server_hostId and shs_server_configs:
            shs_ui_port = None
            shs_ssl_port = None
            shs_ssl_enabled = None
            for config in shs_server_configs:
                if 'relatedName' in config and 'default' in config:
                    if config['relatedName'] == 'spark.history.ui.port':
                        shs_ui_port = config['default']
                    if config['relatedName'] == 'spark.ssl.historyServer.port':
                        shs_ssl_port = config['default']
                    if config[
                            'relatedName'] == 'spark.ssl.historyServer.enabled':
                        shs_ssl_enabled = config['default']
            shs_ui_host = self._root.get('hosts/%(hostId)s' %
                                         {'hostId': shs_server_hostId})
            shs_ui_hostname = shs_ui_host['hostname'] if shs_ui_host else None

            return self.assemble_shs_url(shs_ui_hostname, shs_ui_port,
                                         shs_ssl_port, shs_ssl_enabled)

        return None

    def get_spark_history_server_security_enabled(self, cluster_name=None):
        shs_server_hostId, shs_server_configs = self.get_spark_history_server_configs(
            cluster_name=cluster_name)

        if shs_server_configs:
            for config in shs_server_configs:
                if 'relatedName' in config and 'default' in config and config[
                        'relatedName'] == 'history_server_spnego_enabled':
                    shs_security_enabled = config['default']
                    return shs_security_enabled and shs_security_enabled == 'true'

        return False

    def assemble_shs_url(self,
                         shs_ui_hostname,
                         shs_ui_port=None,
                         shs_ssl_port=None,
                         shs_ssl_enabled=None):
        if not shs_ui_hostname or not shs_ui_port or not shs_ssl_port or not shs_ssl_enabled:
            LOG.warning("Spark conf not found!")
            return None

        protocol = 'https' if shs_ssl_enabled.lower() == 'true' else 'http'
        shs_url = '%(protocol)s://%(hostname)s:%(port)s' % {
            'protocol':
            protocol,
            'hostname':
            shs_ui_hostname,
            'port':
            shs_ssl_port if shs_ssl_enabled.lower() == 'true' else shs_ui_port,
        }

        return shs_url

    def tools_echo(self):
        try:
            params = (('message', 'hello'), )

            LOG.info(params)
            return self._root.get('tools/echo', params=params)
        except RestException as e:
            raise ManagerApiException(e)

    def get_kafka_brokers(self, cluster_name=None):
        try:

            hosts = self._get_hosts('KAFKA',
                                    'KAFKA_BROKER',
                                    cluster_name=cluster_name)

            brokers_hosts = [host['hostname'] + ':9092' for host in hosts]

            return ','.join(brokers_hosts)
        except RestException as e:
            raise ManagerApiException(e)

    def get_kudu_master(self, cluster_name=None):
        try:
            cluster = self._get_cluster(cluster_name)
            services = self._root.get('clusters/%(name)s/services' %
                                      cluster)['items']

            service = [
                service for service in services if service['type'] == 'KUDU'
            ][0]
            master = self._get_roles(cluster['name'], service['name'],
                                     'KUDU_MASTER')[0]

            master_host = self._root.get('hosts/%(hostId)s' %
                                         master['hostRef'])

            return master_host['hostname']
        except RestException as e:
            raise ManagerApiException(e)

    def get_kafka_topics(self, broker_host):
        try:
            client = HttpClient('http://%s:24042' % broker_host, logger=LOG)
            root = Resource(client)

            return root.get('/api/topics')
        except RestException as e:
            raise ManagerApiException(e)

    def update_flume_config(self, cluster_name, config_name, config_value):
        service = 'FLUME-1'
        cluster = self._get_cluster(cluster_name)
        roleConfigGroup = [
            role['roleConfigGroupRef']['roleConfigGroupName']
            for role in self._get_roles(cluster['name'], service, 'AGENT')
        ]
        data = {
            u'items': [{
                u'url':
                u'/api/v8/clusters/%(cluster_name)s/services/%(service)s/roleConfigGroups/%(roleConfigGroups)s/config?message=Updated%20service%20and%20role%20type%20configurations.'
                .replace('%(cluster_name)s',
                         urllib_quote(cluster['name'])).replace(
                             '%(service)s',
                             service).replace('%(roleConfigGroups)s',
                                              roleConfigGroup[0]),
                u'body': {
                    u'items': [{
                        u'name': config_name,
                        u'value': config_value
                    }]
                },
                u'contentType':
                u'application/json',
                u'method':
                u'PUT'
            }]
        }

        return self.batch(items=data)

    def get_flume_agents(self, cluster_name=None):
        return [
            host['hostname'] for host in self._get_hosts(
                'FLUME', 'AGENT', cluster_name=cluster_name)
        ]

    def _get_hosts(self, service_name, role_name, cluster_name=None):
        try:
            cluster = self._get_cluster(cluster_name)
            services = self._root.get('clusters/%(name)s/services' %
                                      cluster)['items']

            service = [
                service for service in services
                if service['type'] == service_name
            ][0]
            hosts = self._get_roles(cluster['name'], service['name'],
                                    role_name)
            hosts_ids = [host['hostRef']['hostId'] for host in hosts]

            hosts = self._root.get('hosts')['items']
            return [host for host in hosts if host['hostId'] in hosts_ids]
        except RestException as e:
            raise ManagerApiException(e)

    def refresh_flume(self, cluster_name, restart=False):
        service = 'FLUME-1'
        cluster = self._get_cluster(cluster_name)
        roles = [
            role['name']
            for role in self._get_roles(cluster['name'], service, 'AGENT')
        ]

        if restart:
            return self.restart_services(cluster['name'], service, roles)
        else:
            return self.refresh_configs(cluster['name'], service, roles)

    def refresh_configs(self, cluster_name, service=None, roles=None):
        try:
            if service is None:
                return self._root.post(
                    'clusters/%(cluster_name)s/commands/refresh' %
                    {'cluster_name': cluster_name},
                    contenttype="application/json")
            elif roles is None:
                return self._root.post(
                    'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh'
                    % {
                        'cluster_name': cluster_name,
                        'service': service
                    },
                    contenttype="application/json")
            else:
                return self._root.post(
                    'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh'
                    % {
                        'cluster_name': cluster_name,
                        'service': service
                    },
                    data=json.dumps({"items": roles}),
                    contenttype="application/json")
        except RestException as e:
            raise ManagerApiException(e)

    def restart_services(self, cluster_name, service=None, roles=None):
        try:
            if service is None:
                return self._root.post(
                    'clusters/%(cluster_name)s/commands/restart' %
                    {'cluster_name': cluster_name},
                    contenttype="application/json")
            elif roles is None:
                return self._root.post(
                    'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart'
                    % {
                        'cluster_name': cluster_name,
                        'service': service
                    },
                    contenttype="application/json")
            else:
                return self._root.post(
                    'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart'
                    % {
                        'cluster_name': cluster_name,
                        'service': service
                    },
                    data=json.dumps({"items": roles}),
                    contenttype="application/json")
        except RestException as e:
            raise ManagerApiException(e)

    def batch(self, items):
        try:
            return self._root.post('batch',
                                   data=json.dumps(items),
                                   contenttype='application/json')
        except RestException as e:
            raise ManagerApiException(e)

    def _get_cluster(self, cluster_name=None):
        clusters = self._root.get('clusters/')['items']

        if cluster_name is not None:
            cluster = [
                cluster for cluster in clusters
                if cluster['name'] == cluster_name
            ][0]
        else:
            cluster = clusters[0]

        return cluster

    def _get_roles(self, cluster_name, service_name, role_type):
        roles = self._root.get(
            'clusters/%(cluster_name)s/services/%(service_name)s/roles' % {
                'cluster_name': cluster_name,
                'service_name': service_name
            })['items']
        return [role for role in roles if role['type'] == role_type]

    def get_impalad_config(self,
                           key=None,
                           impalad_host=None,
                           cluster_name=None):
        if not key or not impalad_host:
            return None

        service_name = "IMPALA"
        role_type = 'IMPALAD'

        try:
            cluster = self._get_cluster(cluster_name)
            services = self._root.get('clusters/%(cluster_name)s/services' % {
                'cluster_name': cluster['name'],
                'service_name': service_name
            })['items']

            service_display_names = [
                service['displayName'] for service in services
                if service['type'] == service_name
            ]

            hosts = self._root.get('hosts')['items']
            impalad_hostIds = [
                host['hostId'] for host in hosts
                if host['hostname'] == impalad_host
            ]

            if impalad_hostIds and service_display_names:
                impalad_hostId = impalad_hostIds[0]
                impala_service_display_name = service_display_names[0]

                servers = self._root.get(
                    'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles'
                    % {
                        'cluster_name': cluster['name'],
                        'spark_service_display_name':
                        impala_service_display_name
                    })['items']

                impalad_server_names = [
                    server['name'] for server in servers
                    if server['type'] == role_type
                    and server['hostRef']['hostId'] == impalad_hostId
                ]
                impalad_server_name = impalad_server_names[
                    0] if impalad_server_names else None

                if impalad_server_name:
                    server_configs = self._root.get(
                        'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config'
                        % {
                            'cluster_name': cluster['name'],
                            'spark_service_display_name':
                            impala_service_display_name,
                            'shs_server_name': impalad_server_name
                        },
                        params={'view': 'full'})['items']

                    for config in server_configs:
                        if 'relatedName' in config and 'value' in config:
                            if config['relatedName'] == key:
                                return config['value']

        except Exception as e:
            LOG.warning(
                "Get Impala Daemon API configurations via ManangerAPI: %s" % e)

        return None
Exemplo n.º 43
0
Arquivo: api.py Projeto: 277800076/hue
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get(), ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()):
    self._url = solr_url
    self._user = user
    self._client = HttpClient(self._url, logger=LOG)
    self.security_enabled = security_enabled

    if self.security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

    self._root = resource.Resource(self._client)

    # The Kerberos handshake requires two requests in order to authenticate,
    # but if our first request is a PUT/POST, it might flat-out reject the
    # first request if the body is too large. So, connect here in order to get
    # a cookie so future PUT/POSTs will be pre-authenticated.
    if self.security_enabled:
      self._root.invoke('HEAD', '/')

  def _get_params(self):
    if self.security_enabled:
      return (('doAs', self._user ),)
    return (('user.name', DEFAULT_USER), ('doAs', self._user),)

  def _get_q(self, query):
    q_template = '(%s)' if len(query['qs']) >= 2 else '%s'
    return 'OR'.join([q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']]).encode('utf-8')

  def _get_aggregate_function(self, facet):
    props = {
        'field': facet['field'],
        'aggregate': facet['properties']['aggregate'] if 'properties' in facet else facet['aggregate']
    }

    if props['aggregate'] == 'median':
      return 'percentile(%(field)s,50)' % props
    else:
      return '%(aggregate)s(%(field)s)' % props

  def _get_range_borders(self, collection, query):
    props = {}
    GAPS = {
        '5MINUTES': {
            'histogram-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
            'bucket-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
            'bar-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots
            'facet-widget': {'coeff': '+1', 'unit': 'MINUTES'}, # ~10 slots
        },
        '30MINUTES': {
            'histogram-widget': {'coeff': '+20', 'unit': 'SECONDS'},
            'bucket-widget': {'coeff': '+20', 'unit': 'SECONDS'},
            'bar-widget': {'coeff': '+20', 'unit': 'SECONDS'},
            'facet-widget': {'coeff': '+5', 'unit': 'MINUTES'},
        },
        '1HOURS': {
            'histogram-widget': {'coeff': '+30', 'unit': 'SECONDS'},
            'bucket-widget': {'coeff': '+30', 'unit': 'SECONDS'},
            'bar-widget': {'coeff': '+30', 'unit': 'SECONDS'},
            'facet-widget': {'coeff': '+10', 'unit': 'MINUTES'},
        },
        '12HOURS': {
            'histogram-widget': {'coeff': '+7', 'unit': 'MINUTES'},
            'bucket-widget': {'coeff': '+7', 'unit': 'MINUTES'},
            'bar-widget': {'coeff': '+7', 'unit': 'MINUTES'},
            'facet-widget': {'coeff': '+1', 'unit': 'HOURS'},
        },
        '1DAYS': {
            'histogram-widget': {'coeff': '+15', 'unit': 'MINUTES'},
            'bucket-widget': {'coeff': '+15', 'unit': 'MINUTES'},
            'bar-widget': {'coeff': '+15', 'unit': 'MINUTES'},
            'facet-widget': {'coeff': '+3', 'unit': 'HOURS'},
        },
        '2DAYS': {
            'histogram-widget': {'coeff': '+30', 'unit': 'MINUTES'},
            'bucket-widget': {'coeff': '+30', 'unit': 'MINUTES'},
            'bar-widget': {'coeff': '+30', 'unit': 'MINUTES'},
            'facet-widget': {'coeff': '+6', 'unit': 'HOURS'},
        },
        '7DAYS': {
            'histogram-widget': {'coeff': '+3', 'unit': 'HOURS'},
            'bucket-widget': {'coeff': '+3', 'unit': 'HOURS'},
            'bar-widget': {'coeff': '+3', 'unit': 'HOURS'},
            'facet-widget': {'coeff': '+1', 'unit': 'DAYS'},
        },
        '1MONTHS': {
            'histogram-widget': {'coeff': '+12', 'unit': 'HOURS'},
            'bucket-widget': {'coeff': '+12', 'unit': 'HOURS'},
            'bar-widget': {'coeff': '+12', 'unit': 'HOURS'},
            'facet-widget': {'coeff': '+5', 'unit': 'DAYS'},
        },
        '3MONTHS': {
            'histogram-widget': {'coeff': '+1', 'unit': 'DAYS'},
            'bucket-widget': {'coeff': '+1', 'unit': 'DAYS'},
            'bar-widget': {'coeff': '+1', 'unit': 'DAYS'},
            'facet-widget': {'coeff': '+30', 'unit': 'DAYS'},
        },
        '1YEARS': {
            'histogram-widget': {'coeff': '+3', 'unit': 'DAYS'},
            'bucket-widget': {'coeff': '+3', 'unit': 'DAYS'},
            'bar-widget': {'coeff': '+3', 'unit': 'DAYS'},
            'facet-widget': {'coeff': '+12', 'unit': 'MONTHS'},
        },
        '2YEARS': {
            'histogram-widget': {'coeff': '+7', 'unit': 'DAYS'},
            'bucket-widget': {'coeff': '+7', 'unit': 'DAYS'},
            'bar-widget': {'coeff': '+7', 'unit': 'DAYS'},
            'facet-widget': {'coeff': '+3', 'unit': 'MONTHS'},
        },
        '10YEARS': {
            'histogram-widget': {'coeff': '+1', 'unit': 'MONTHS'},
            'bucket-widget': {'coeff': '+1', 'unit': 'MONTHS'},
            'bar-widget': {'coeff': '+1', 'unit': 'MONTHS'},
            'facet-widget': {'coeff': '+1', 'unit': 'YEARS'},
        }
    }

    time_field = collection['timeFilter'].get('field')

    if time_field and (collection['timeFilter']['value'] != 'all' or collection['timeFilter']['type'] == 'fixed'):
      # fqs overrides main time filter
      fq_time_ids = [fq['id'] for fq in query['fqs'] if fq['field'] == time_field]
      props['time_filter_overrides'] = fq_time_ids
      props['time_field'] = time_field

      if collection['timeFilter']['type'] == 'rolling':
        props['field'] = collection['timeFilter']['field']
        props['from'] = 'NOW-%s' % collection['timeFilter']['value']
        props['to'] = 'NOW'
        props['gap'] = GAPS.get(collection['timeFilter']['value'])
      elif collection['timeFilter']['type'] == 'fixed':
        props['field'] = collection['timeFilter']['field']
        props['from'] = collection['timeFilter']['from']
        props['to'] = collection['timeFilter']['to']
        props['fixed'] = True

    return props

  def _get_time_filter_query(self, timeFilter, facet):
    if 'fixed' in timeFilter:
      props = {}
      stat_facet = {'min': timeFilter['from'], 'max': timeFilter['to']}
      _compute_range_facet(facet['widgetType'], stat_facet, props, stat_facet['min'], stat_facet['max'])
      gap = props['gap']
      unit = re.split('\d+', gap)[1]
      return {
        'start': '%(from)s/%(unit)s' % {'from': timeFilter['from'], 'unit': unit},
        'end': '%(to)s/%(unit)s' % {'to': timeFilter['to'], 'unit': unit},
        'gap': '%(gap)s' % props, # add a 'auto'
      }
    else:
      gap = timeFilter['gap'][facet['widgetType']]
      return {
        'start': '%(from)s/%(unit)s' % {'from': timeFilter['from'], 'unit': gap['unit']},
        'end': '%(to)s/%(unit)s' % {'to': timeFilter['to'], 'unit': gap['unit']},
        'gap': '%(coeff)s%(unit)s/%(unit)s' % gap, # add a 'auto'
      }

  def _get_fq(self, collection, query):
    params = ()
    timeFilter = {}

    if collection:
      timeFilter = self._get_range_borders(collection, query)
    if timeFilter and not timeFilter.get('time_filter_overrides'):
      params += (('fq', urllib.unquote(utf_quoter('%(field)s:[%(from)s TO %(to)s]' % timeFilter))),)

    # Merge facets queries on same fields
    grouped_fqs = groupby(query['fqs'], lambda x: (x['type'], x['field']))
    merged_fqs = []
    for key, group in grouped_fqs:
      field_fq = next(group)
      for fq in group:
        for f in fq['filter']:
          field_fq['filter'].append(f)
      merged_fqs.append(field_fq)

    for fq in merged_fqs:
      if fq['type'] == 'field':
        fields = fq['field'] if type(fq['field']) == list else [fq['field']] # 2D facets support
        for field in fields:
          f = []
          for _filter in fq['filter']:
            values = _filter['value'] if type(_filter['value']) == list else [_filter['value']] # 2D facets support
            if fields.index(field) < len(values): # Lowest common field denominator
              value = values[fields.index(field)]
              exclude = '-' if _filter['exclude'] else ''
              if value is not None and ' ' in force_unicode(value):
                value = force_unicode(value).replace('"', '\\"')
                f.append('%s%s:"%s"' % (exclude, field, value))
              else:
                f.append('%s{!field f=%s}%s' % (exclude, field, value))
          _params ='{!tag=%(id)s}' % fq + ' '.join(f)
          params += (('fq', urllib.unquote(utf_quoter(_params))),)
      elif fq['type'] == 'range':
        params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib.unquote(
                    utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'], f['to']))) for field, f in zip(fq['filter'], fq['properties'])])),)
      elif fq['type'] == 'range-up':
        params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib.unquote(
                    utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'] if fq['is_up'] else '*', '*' if fq['is_up'] else f['from'])))
                                                          for field, f in zip(fq['filter'], fq['properties'])])),)
      elif fq['type'] == 'map':
        _keys = fq.copy()
        _keys.update(fq['properties'])
        params += (('fq', '{!tag=%(id)s}' % fq + urllib.unquote(
                    utf_quoter('%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}' % _keys))),)

    return params

  def query(self, collection, query):
    solr_query = {}

    solr_query['collection'] = collection['name']

    if query.get('download'):
      solr_query['rows'] = 1000
      solr_query['start'] = 0
    else:
      solr_query['rows'] = int(collection['template']['rows'] or 10)
      solr_query['start'] = int(query['start'])

    solr_query['rows'] = min(solr_query['rows'], 1000)
    solr_query['start'] = min(solr_query['start'], 10000)

    params = self._get_params() + (
        ('q', self._get_q(query)),
        ('wt', 'json'),
        ('rows', solr_query['rows']),
        ('start', solr_query['start']),
    )

    if any(collection['facets']):
      params += (
        ('facet', 'true'),
        ('facet.mincount', 0),
        ('facet.limit', 10),
      )
      json_facets = {}

      timeFilter = self._get_range_borders(collection, query)

      for facet in collection['facets']:
        if facet['type'] == 'query':
          params += (('facet.query', '%s' % facet['field']),)
        elif facet['type'] == 'range' or facet['type'] == 'range-up':
          keys = {
              'id': '%(id)s' % facet,
              'field': facet['field'],
              'key': '%(field)s-%(id)s' % facet,
              'start': facet['properties']['start'],
              'end': facet['properties']['end'],
              'gap': facet['properties']['gap'],
              'mincount': int(facet['properties']['mincount'])
          }

          if timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'):
            keys.update(self._get_time_filter_query(timeFilter, facet))

          params += (
             ('facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys),
          )
        elif facet['type'] == 'field':
          keys = {
              'id': '%(id)s' % facet,
              'field': facet['field'],
              'key': '%(field)s-%(id)s' % facet,
              'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0),
              'mincount': int(facet['properties']['mincount'])
          }
          params += (
              ('facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys),
          )
        elif facet['type'] == 'nested':
          _f = {
              'field': facet['field'],
              'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0),
              'mincount': int(facet['properties']['mincount'])
          }

          if 'start' in facet['properties']:
            _f.update({
                'type': 'range',
                'start': facet['properties']['start'],
                'end': facet['properties']['end'],
                'gap': facet['properties']['gap'],
            })
            if timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'bucket-widget'):
              _f.update(self._get_time_filter_query(timeFilter, facet))
          else:
            _f.update({
                'type': 'terms',
                'field': facet['field'],
                'excludeTags': facet['id']
            })

          if facet['properties']['facets']:
            if facet['properties']['facets'][0]['aggregate'] == 'count':
              _f['facet'] = {
                  'd2': {
                      'type': 'terms',
                      'field': '%(field)s' % facet['properties']['facets'][0],
                      'limit': int(facet['properties']['facets'][0].get('limit', 10)),
                      'mincount': int(facet['properties']['facets'][0]['mincount'])
                  }
              }
              if len(facet['properties']['facets']) > 1: # Get 3rd dimension calculation
                _f['facet']['d2']['facet'] = {
                    'd2': self._get_aggregate_function(facet['properties']['facets'][1])
                }
            else:
              _f['facet'] = {
                  'd2': self._get_aggregate_function(facet['properties']['facets'][0])
              }

          json_facets[facet['id']] = _f
        elif facet['type'] == 'function':
          json_facets[facet['id']] = self._get_aggregate_function(facet)
          json_facets['processEmpty'] = True
        elif facet['type'] == 'pivot':
          if facet['properties']['facets'] or facet['widgetType'] == 'map-widget':
            fields = facet['field']
            fields_limits = []
            for f in facet['properties']['facets']:
              fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit']))
              fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount']))
              fields += ',' + f['field']
            keys = {
                'id': '%(id)s' % facet,
                'key': '%(field)s-%(id)s' % facet,
                'field': facet['field'],
                'fields': fields,
                'limit': int(facet['properties'].get('limit', 10)),
                'mincount': int(facet['properties']['mincount']),
                'fields_limits': ' '.join(fields_limits)
            }
            params += (
                ('facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys),
            )

      if json_facets:
        params += (
            ('json.facet', json.dumps(json_facets)),
        )

    params += self._get_fq(collection, query)

    if collection['template']['fieldsSelected'] and collection['template']['isGridLayout']:
      fields = set(collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else [])
      # Add field if needed
      if collection['template']['leafletmap'].get('latitudeField'):
        fields.add(collection['template']['leafletmap']['latitudeField'])
      if collection['template']['leafletmap'].get('longitudeField'):
        fields.add(collection['template']['leafletmap']['longitudeField'])
      if collection['template']['leafletmap'].get('labelField'):
        fields.add(collection['template']['leafletmap']['labelField'])
      params += (('fl', urllib.unquote(utf_quoter(','.join(list(fields))))),)
    else:
      params += (('fl', '*'),)

    params += (
      ('hl', 'true'),
      ('hl.fl', '*'),
      ('hl.snippets', 5),
      ('hl.fragsize', 1000),
    )

    if collection['template']['fieldsSelected']:
      fields = []
      for field in collection['template']['fieldsSelected']:
        attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes'])
        if attribute_field:
          if attribute_field[0]['sort']['direction']:
            fields.append('%s %s' % (field, attribute_field[0]['sort']['direction']))
      if fields:
        params += (
          ('sort', ','.join(fields)),
        )

    response = self._root.get('%(collection)s/select' % solr_query, params)
    return self._get_json(response)


  def suggest(self, collection, query):
    try:
      params = self._get_params() + (
          ('suggest', 'true'),
          ('suggest.build', 'true'),
          ('suggest.q', query['q']),
          ('wt', 'json'),
      )
      if query.get('dictionary'):
        params += (
            ('suggest.dictionary', query['dictionary']),
        )
      response = self._root.get('%s/suggest' % collection, params)
      return self._get_json(response)
    except RestException, e:
      raise PopupException(e, title=_('Error while accessing Solr'))
Exemplo n.º 44
0
class OozieApi(object):
  def __init__(self, oozie_url, security_enabled=False):
    self._url = posixpath.join(oozie_url, API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    if security_enabled:
      self._client.set_kerberos_auth()
    self._root = Resource(self._client)
    self._security_enabled = security_enabled

    # To store user info
    self._thread_local = threading.local()

  def __str__(self):
    return "OozieApi at %s" % (self._url,)

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  @property
  def user(self):
    try:
      return self._thread_local.user
    except AttributeError:
      return DEFAULT_USER

  def setuser(self, user):
    """Return the previous user"""
    prev = self.user
    self._thread_local.user = user
    return prev

  def _get_params(self):
    if self.security_enabled:
      return { 'doAs': self.user, 'timezone': TIME_ZONE.get() }
    return { 'user.name': DEFAULT_USER, 'doAs': self.user, 'timezone': TIME_ZONE.get() }

  def _get_oozie_properties(self, properties=None):
    defaults = {
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)

    return defaults

  VALID_JOB_FILTERS = ('name', 'user', 'group', 'status')

  def get_jobs(self, jobtype, offset=None, cnt=None, **kwargs):
    """
    Get a list of Oozie jobs.

    jobtype is 'wf', 'coord'
    Note that offset is 1-based.
    kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status
    """
    params = self._get_params()
    if offset is not None:
      params['offset'] = str(offset)
    if cnt is not None:
      params['len'] = str(cnt)
    params['jobtype'] = jobtype

    filter_list = [ ]
    for key, val in kwargs.iteritems():
      if key not in OozieApi.VALID_JOB_FILTERS:
        raise ValueError('"%s" is not a valid filter for selecting jobs' % (key,))
      filter_list.append('%s=%s' % (key, val))
    params['filter'] = ';'.join(filter_list)

    # Send the request
    resp = self._root.get('jobs', params)
    if jobtype == 'wf':
      wf_list = WorkflowList(self, resp, filters=kwargs)
    elif jobtype == 'coord':
      wf_list = CoordinatorList(self, resp, filters=kwargs)
    else:
      wf_list = BundleList(self, resp, filters=kwargs)
    return wf_list

  def get_workflows(self, offset=None, cnt=None, **kwargs):
    return self.get_jobs('wf', offset, cnt, **kwargs)

  def get_coordinators(self, offset=None, cnt=None, **kwargs):
    return self.get_jobs('coord', offset, cnt, **kwargs)

  def get_bundles(self, offset=None, cnt=None, **kwargs):
    return self.get_jobs('bundle', offset, cnt, **kwargs)

  # TODO: make get_job accept any jobid
  def get_job(self, jobid):
    """
    get_job(jobid) -> Workflow
    """
    params = self._get_params()
    resp = self._root.get('job/%s' % (jobid,), params)
    wf = Workflow(self, resp)
    return wf

  def get_coordinator(self, jobid):
    params = self._get_params()
    params.update({'len': -1})
    resp = self._root.get('job/%s' % (jobid,), params)
    return Coordinator(self, resp)

  def get_bundle(self, jobid):
    params = self._get_params()
    resp = self._root.get('job/%s' % (jobid,), params)
    return Bundle(self, resp)

  def get_job_definition(self, jobid):
    """
    get_job_definition(jobid) -> Definition (xml string)
    """
    params = self._get_params()
    params['show'] = 'definition'
    xml = self._root.get('job/%s' % (jobid,), params)
    return xml

  def get_job_log(self, jobid):
    """
    get_job_log(jobid) -> Log (xml string)
    """
    params = self._get_params()
    params['show'] = 'log'
    xml = self._root.get('job/%s' % (jobid,), params)
    return xml

  def get_action(self, action_id):
    if 'C@' in action_id:
      Klass = CoordinatorAction
    elif 'B@' in action_id:
      Klass = BundleAction
    else:
      Klass = WorkflowAction
    params = self._get_params()
    resp = self._root.get('job/%s' % (action_id,), params)
    return Klass(resp)

  def job_control(self, jobid, action, properties=None, parameters=None):
    """
    job_control(jobid, action) -> None
    Raise RestException on error.
    """
    if action not in ('start', 'suspend', 'resume', 'kill', 'rerun', 'coord-rerun', 'bundle-rerun'):
      msg = 'Invalid oozie job action: %s' % (action,)
      LOG.error(msg)
      raise ValueError(msg)
    properties = self._get_oozie_properties(properties)
    params = self._get_params()
    params['action'] = action
    if parameters is not None:
      params.update(parameters)

    return self._root.put('job/%s' % jobid, params,  data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

  def submit_workflow(self, application_path, properties=None):
    """
    submit_workflow(application_path, properties=None) -> jobid

    Raise RestException on error.
    """
    defaults = {
      'oozie.wf.application.path': application_path,
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)
    properties = defaults

    return self.submit_job(properties)

  # Is name actually submit_coord?
  def submit_job(self, properties=None):
    """
    submit_job(properties=None, id=None) -> jobid

    Raise RestException on error.
    """
    defaults = {
      'user.name': self.user,
    }

    if properties is not None:
      defaults.update(properties)

    properties = defaults

    params = self._get_params()
    resp = self._root.post('jobs', params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)
    return resp['id']

  def rerun(self, jobid, properties=None, params=None):
    properties = self._get_oozie_properties(properties)
    if params is None:
      params = self._get_params()
    else:
      self._get_params().update(params)

    params['action'] = 'rerun'

    return self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE)

  def get_build_version(self):
    """
    get_build_version() -> Build version (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/build-version', params)
    return resp

  def get_instrumentation(self):
    params = self._get_params()
    resp = self._root.get('admin/instrumentation', params)
    return resp

  def get_configuration(self):
    """
    get_configuration() -> Oozie config (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/configuration', params)
    return resp

  def get_oozie_status(self):
    """
    get_oozie_status() -> Oozie status (dictionary)
    """
    params = self._get_params()
    resp = self._root.get('admin/status', params)
    return resp
Exemplo n.º 45
0
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get()):
    self._url = solr_url
    self._user = user
    self._client = HttpClient(self._url, logger=LOG)
    self.security_enabled = security_enabled
    if self.security_enabled:
      self._client.set_kerberos_auth()
    self._root = resource.Resource(self._client)


  def _get_params(self):
    if self.security_enabled:
      return (('doAs', self._user ),)
    return (('user.name', DEFAULT_USER), ('doAs', self._user),)


  def _get_q(self, query):
    q_template = '(%s)' if len(query['qs']) >= 2 else '%s'
    return 'OR'.join([q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']]).encode('utf-8')

  def _get_aggregate_function(self, facet):
    props = {
        'field': facet['field'],
        'aggregate': facet['properties']['aggregate'] if 'properties' in facet else facet['aggregate']
    }
    if props['aggregate'] == 'median':
      return 'percentile(%(field)s,50)' % props
    else:
      return '%(aggregate)s(%(field)s)' % props

  def _get_fq(self, query):
    params = ()

    # Merge facets queries on same fields
    grouped_fqs = groupby(query['fqs'], lambda x: (x['type'], x['field']))
    merged_fqs = []
    for key, group in grouped_fqs:
      field_fq = next(group)
      for fq in group:
        for f in fq['filter']:
          field_fq['filter'].append(f)
      merged_fqs.append(field_fq)

    for fq in merged_fqs:
      if fq['type'] == 'field':
        fields = fq['field'] if type(fq['field']) == list else [fq['field']] # 2D facets support
        for field in fields:
          f = []
          for _filter in fq['filter']:
            values = _filter['value'] if type(_filter['value']) == list else [_filter['value']] # 2D facets support
            if fields.index(field) < len(values): # Lowest common field denominator
              value = values[fields.index(field)]
              exclude = '-' if _filter['exclude'] else ''
              if value is not None and ' ' in smart_str(value):
                f.append('%s%s:"%s"' % (exclude, field, value))
              else:
                f.append('%s{!field f=%s}%s' % (exclude, field, value))
          _params ='{!tag=%(id)s}' % fq + ' '.join(f)
          params += (('fq', urllib.unquote(utf_quoter(_params))),)
      elif fq['type'] == 'range':
        params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib.unquote(
                    utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'], f['to']))) for field, f in zip(fq['filter'], fq['properties'])])),)
      elif fq['type'] == 'range-up':
        params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([urllib.unquote(
                    utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'] if fq['is_up'] else '*', '*' if fq['is_up'] else f['from'])))
                                                          for field, f in zip(fq['filter'], fq['properties'])])),)
    return params

  def query(self, collection, query):
    solr_query = {}

    solr_query['collection'] = collection['name']

    if query.get('download'):
      solr_query['rows'] = 1000
      solr_query['start'] = 0
    else:
      solr_query['rows'] = int(collection['template']['rows'] or 10)
      solr_query['start'] = int(query['start'])

    solr_query['rows'] = min(solr_query['rows'], 1000)
    solr_query['start'] = min(solr_query['start'], 10000)

    params = self._get_params() + (
        ('q', self._get_q(query)),
        ('wt', 'json'),
        ('rows', solr_query['rows']),
        ('start', solr_query['start']),
    )

    if any(collection['facets']):
      params += (
        ('facet', 'true'),
        ('facet.mincount', 0),
        ('facet.limit', 10),
      )
      json_facets = {}

      for facet in collection['facets']:
        if facet['type'] == 'query':
          params += (('facet.query', '%s' % facet['field']),)
        elif facet['type'] == 'range' or facet['type'] == 'range-up':
          keys = {
              'id': '%(id)s' % facet,
              'field': facet['field'],
              'key': '%(field)s-%(id)s' % facet,
              'start': facet['properties']['start'],
              'end': facet['properties']['end'],
              'gap': facet['properties']['gap'],
              'mincount': int(facet['properties']['mincount'])
          }
          params += (
             ('facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys),
          )
        elif facet['type'] == 'field':
          keys = {
              'id': '%(id)s' % facet,
              'field': facet['field'],
              'key': '%(field)s-%(id)s' % facet,
              'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0),
              'mincount': int(facet['properties']['mincount'])
          }
          params += (
              ('facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys),
          )
        elif facet['type'] == 'nested':
          _f = {
              'field': facet['field'],
              'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0),
              'mincount': int(facet['properties']['mincount'])
          }

          if 'start' in facet['properties']:
            _f.update({
                'type': 'range',
                'start': facet['properties']['start'],
                'end': facet['properties']['end'],
                'gap': facet['properties']['gap'],
            })
          else:
            _f.update({
                'type': 'terms',
                'field': facet['field'],
                'excludeTags': facet['id']
            })

          if facet['properties']['facets']:
            if facet['properties']['facets'][0]['aggregate'] == 'count':
              _f['facet'] = {
                  'd2': {
                      'type': 'terms',
                      'field': '%(field)s' % facet['properties']['facets'][0]
                  }
              }
              if len(facet['properties']['facets']) > 1: # Get 3rd dimension calculation
                _f['facet']['d2']['facet'] = {
                    'd2': self._get_aggregate_function(facet['properties']['facets'][1])
                }
            else:
              _f['facet'] = {
                  'd2': self._get_aggregate_function(facet['properties']['facets'][0])
              }

          json_facets[facet['id']] = _f
        elif facet['type'] == 'function':
          json_facets[facet['id']] = self._get_aggregate_function(facet)
        elif facet['type'] == 'pivot':
          if facet['properties']['facets'] or facet['widgetType'] == 'map-widget':
            fields = facet['field']
            fields_limits = []
            for f in facet['properties']['facets']:
              fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit']))
              fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount']))
              fields += ',' + f['field']
            keys = {
                'id': '%(id)s' % facet,
                'key': '%(field)s-%(id)s' % facet,
                'field': facet['field'],
                'fields': fields,
                'limit': int(facet['properties'].get('limit', 10)),
                'mincount': int(facet['properties']['mincount']),
                'fields_limits': ' '.join(fields_limits)
            }
            params += (
                ('facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys),
            )

      if json_facets:
        params += (
            ('json.facet', json.dumps(json_facets)),
        )

    params += self._get_fq(query)

    if collection['template']['fieldsSelected'] and collection['template']['isGridLayout']:
      fields = set(collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else [])
      # Add field if needed
      if collection['template']['leafletmap'].get('latitudeField'):
        fields.add(collection['template']['leafletmap']['latitudeField'])
      if collection['template']['leafletmap'].get('longitudeField'):
        fields.add(collection['template']['leafletmap']['longitudeField'])
      if collection['template']['leafletmap'].get('labelField'):
        fields.add(collection['template']['leafletmap']['labelField'])
      params += (('fl', urllib.unquote(utf_quoter(','.join(list(fields))))),)
    else:
      params += (('fl', '*'),)

    params += (
      ('hl', 'true'),
      ('hl.fl', '*'),
      ('hl.snippets', 3),
      ('hl.fragsize', 0),
    )

    if collection['template']['fieldsSelected']:
      fields = []
      for field in collection['template']['fieldsSelected']:
        attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes'])
        if attribute_field:
          if attribute_field[0]['sort']['direction']:
            fields.append('%s %s' % (field, attribute_field[0]['sort']['direction']))
      if fields:
        params += (
          ('sort', ','.join(fields)),
        )

    response = self._root.get('%(collection)s/select' % solr_query, params)
    return self._get_json(response)


  def suggest(self, solr_query, hue_core):
    try:
      params = self._get_params() + (
          ('q', solr_query['q']),
          ('wt', 'json'),
      )
      response = self._root.get('%(collection)s/suggest' % solr_query, params)
      return self._get_json(response)
    except RestException, e:
      raise PopupException(e, title=_('Error while accessing Solr'))
Exemplo n.º 46
0
class AtlasApi(Api):
    """
  https://atlas.apache.org
  """
    DEFAULT_SEARCH_FIELDS = (('originalName', 3), ('originalDescription', 1),
                             ('name', 10), ('description', 3), ('tags', 5))
    CATALOG_NAMESPACE = '__cloudera_internal_catalog_hue'

    NAV_TO_ATLAS_TYPE = {
        'table': 'hive_table',
        'database': 'hive_db',
        'field': 'hive_column'
    }

    ATLAS_TO_NAV_TYPE = {
        'hive_table': 'TABLE',
        'hive_db': 'DATABASE',
        'hive_column': 'FIELD'
    }

    CLASSIFICATION_RE = re.compile(
        '(?:tag|tags|classification)\s*\:\s*(?:(?:\"([^"]+)\")|([^ ]+))\s*',
        re.IGNORECASE)
    TYPE_RE = re.compile('type\s*\:\s*([^ ]+)\s*', re.IGNORECASE)
    OWNER_RE = re.compile('owner\s*\:\s*([^ ]+)\s*', re.IGNORECASE)

    def __init__(self, user=None):
        super(AtlasApi, self).__init__(user)

        self._api_url = CATALOG.API_URL.get().strip('/') + "/api/atlas"
        self._username = CATALOG.SERVER_USER.get()
        self._password = CATALOG.SERVER_PASSWORD.get()

        self._client = HttpClient(self._api_url, logger=LOG)
        if CATALOG.KERBEROS_ENABLED.get():
            self._client.set_kerberos_auth()
        elif self._password:
            self._client.set_basic_auth(self._username, self._password)

        self._root = resource.Resource(
            self._client, urlencode=False)  # For search_entities_interactive

        self.__headers = {}
        self.__params = ()

        #self._fillup_properties() # Disabled currently

    def _get_types_from_sources(self, sources):
        default_entity_types = entity_types = ('DATABASE', 'TABLE',
                                               'PARTITION', 'FIELD', 'FILE',
                                               'VIEW', 'S3BUCKET', 'OPERATION',
                                               'DIRECTORY')

        if 'sql' in sources or 'hive' in sources or 'impala' in sources:
            entity_types = ('TABLE', 'VIEW', 'DATABASE', 'PARTITION', 'FIELD')
            default_entity_types = ('TABLE', 'VIEW')
        elif 'hdfs' in sources:
            entity_types = ('FILE', 'DIRECTORY')
            default_entity_types = ('FILE', 'DIRECTORY')
        elif 's3' in sources:
            entity_types = ('FILE', 'DIRECTORY', 'S3BUCKET')
            default_entity_types = ('DIRECTORY', 'S3BUCKET')

        return default_entity_types, entity_types

    def adapt_atlas_entity_to_navigator(self, atlas_entity):
        nav_entity = {
            "created":
            'createTime' in atlas_entity['attributes']
            and atlas_entity['attributes']['createTime'],
            "customProperties":
            None,
            "description":
            atlas_entity['attributes'].get('description'),
            "identity":
            atlas_entity['guid'],
            "internalType":
            atlas_entity['typeName'],
            "meaningNames":
            atlas_entity['meaningNames'],  # Atlas specific
            "meanings":
            atlas_entity['meanings'],  # Atlas specific
            "name":
            atlas_entity['attributes'].get('name'),
            "original_name":
            atlas_entity['attributes'].get('name'),
            "originalDescription":
            None,
            "originalName":
            atlas_entity['attributes'].get('name'),
            "owner":
            atlas_entity['attributes'].get('owner'),
            "parentPath":
            '',  # Set below
            "properties": {},  # Set below
            "sourceType":
            '',  # Set below
            "classifications": [],
            "tags":
            atlas_entity['classificationNames'],
            "type":
            self.ATLAS_TO_NAV_TYPE.get(atlas_entity['typeName'].lower())
            or atlas_entity['typeName']
        }

        # Convert Atlas qualified name of form db.tbl.col@cluster to parentPath of form /db/tbl
        if atlas_entity['typeName'].lower().startswith('hive_'):
            nav_entity['sourceType'] = 'HIVE'
            qualified_path_parts = re.sub(
                r'@.*$', '',
                atlas_entity['attributes'].get('qualifiedName')).split('.')
            qualified_path_parts.pop(
            )  # it's just the parent path we want so remove the entity name
            nav_entity['parentPath'] = '/' + '/'.join(qualified_path_parts)

        if 'classifications' in atlas_entity:
            nav_entity['classifications'] = atlas_entity['classifications']
            for atlas_classification in atlas_entity['classifications']:
                if 'attributes' in atlas_classification:
                    for key, value in atlas_classification[
                            'attributes'].iteritems():
                        nav_entity['properties'][key] = value

        return nav_entity

    def fetch_single_entity(self, dsl_query):
        '''
    REQUEST: hue:8889/metadata/api/navigator/find_entity?type=database&name=default
    SAMPLE response for Navigator find_entity response
    {"status": 0, "entity": {
    "customProperties": null,
    "deleteTime": null,
     "fileSystemPath": "hdfs://nightly6x-1.vpc.cloudera.com:8020/user/hive/warehouse",
     "description": null,
     "params": null,
      "type": "DATABASE",
      "internalType": "hv_database",
      "sourceType": "HIVE",
      "tags": [],
      "deleted": false, "technicalProperties": null,
      "userEntity": false,
      "originalDescription": "Default Hive database",
      "metaClassName": "hv_database",
      "properties": {"__cloudera_internal__hueLink": "https://nightly6x-1.vpc.cloudera.com:8889/hue/metastore/tables/default"},
      "identity": "23",
      "firstClassParentId": null,
      "name": null,
      "extractorRunId": "7##1",
      "sourceId": "7",
       "packageName": "nav",
       "parentPath": null, "originalName": "default"}}
    '''
        response = {"status": 0, "entity": []}

        try:
            atlas_response = self._root.get('/v2/search/dsl?query=%s' %
                                            dsl_query,
                                            headers=self.__headers,
                                            params=self.__params)
            if not 'entities' in atlas_response or len(
                    atlas_response['entities']) < 1:
                raise CatalogEntityDoesNotExistException(
                    'Could not find entity with query: %s' % dsl_query)

            for atlas_entity in atlas_response['entities']:
                response['entity'].append(
                    self.adapt_atlas_entity_to_navigator(atlas_entity))

            return response['entity'][0]
        except RestException as e:
            if e.code == 401:
                raise raise_popup_exception(
                    'Hue could not authenticate to Atlas', detail=e)
            else:
                raise raise_popup_exception('Hue could not query Atlas',
                                            detail=e)

    def get_database(self, name):
        # Search with Atlas API for hive database with specific name
        if get_catalog_search_cluster():
            qualifiedNameCriteria = 'qualifiedName=\'%s@%s\'' % (
                name, get_catalog_search_cluster())
        else:
            qualifiedNameCriteria = 'qualifiedName like \'%s@*\'' % name

        return self.fetch_single_entity('hive_db where %s' %
                                        qualifiedNameCriteria)

    def get_table(self, database_name, table_name, is_view=False):
        # Search with Atlas API for hive tables with specific name
        if get_catalog_search_cluster():
            qualifiedNameCriteria = 'qualifiedName=\'%s.%s@%s\'' % (
                database_name, table_name, get_catalog_search_cluster())
        else:
            qualifiedNameCriteria = 'qualifiedName like \'%s.%s@*\'' % (
                database_name, table_name)

        return self.fetch_single_entity('hive_table where %s' %
                                        qualifiedNameCriteria)

    def get_field(self, database_name, table_name, field_name):
        # Search with Atlas API for hive tables with specific qualified name
        if get_catalog_search_cluster():
            qualifiedNameCriteria = 'qualifiedName=\'%s.%s.%s@%s\'' % (
                database_name, table_name, field_name,
                get_catalog_search_cluster())
        else:
            qualifiedNameCriteria = 'qualifiedName like \'%s.%s.%s@*\'' % (
                database_name, table_name, field_name)

        return self.fetch_single_entity('hive_column where %s' %
                                        qualifiedNameCriteria)

    def search_entities_interactive(self,
                                    query_s=None,
                                    limit=100,
                                    offset=0,
                                    facetFields=None,
                                    facetPrefix=None,
                                    facetRanges=None,
                                    filterQueries=None,
                                    firstClassEntitiesOnly=None,
                                    sources=None):
        response = {"status": 0, "results": [], "facets": {"tags": {}}}

        # This takes care of the list_tags endpoint
        if not query_s and facetFields and 'tags' in facetFields:
            classification_response = self._root.get(
                '/v2/types/typedefs?type=classification')
            for classification_def in classification_response[
                    'classificationDefs']:
                if ' ' in classification_def['name']:
                    response['facets']['tags']['"' +
                                               classification_def['name'] +
                                               '"'] = -1
                else:
                    response['facets']['tags'][classification_def['name']] = -1
            return response

        query_s = (query_s.strip() if query_s else '').replace('*', '')

        atlas_type = None
        classification = None
        owner = None

        # Take the first classification and type facets and ignore other as we can't search multiple in Atlas.
        classification_facets = self.CLASSIFICATION_RE.findall(query_s)
        if classification_facets:
            classification = classification_facets[0][
                0] or classification_facets[0][1]
            query_s = self.CLASSIFICATION_RE.sub('', query_s).strip()
            atlas_type = 'Asset'  # Filtered below to just contain hive_db, hive_table or hive_column

        owner_facets = self.OWNER_RE.findall(query_s)
        if owner_facets:
            owner = owner_facets[0]
            query_s = self.OWNER_RE.sub('', query_s).strip()

        type_facets = self.TYPE_RE.findall(query_s)
        if type_facets:
            atlas_type = self.NAV_TO_ATLAS_TYPE[
                type_facets[0].lower()] or type_facets[0]
            query_s = self.TYPE_RE.sub('', query_s).strip()

        data = {
            'attributes': None,
            'classification': classification,
            'entityFilters': {
                'condition':
                'AND',
                'criterion': [{
                    'condition':
                    'OR',
                    'criterion': [{
                        'attributeName': 'name',
                        'attributeValue': query_s,
                        'operator': 'contains'
                    }, {
                        'attributeName': 'description',
                        'attributeValue': query_s,
                        'operator': 'contains'
                    }]
                }]
            },
            'excludeDeletedEntities': True,
            'includeClassificationAttributes': True,
            'includeSubClassifications': True,
            'includeSubTypes': True,
            'limit': limit,
            'offset': 0,
            'tagFilters': None,
            'termName': None,
            'typeName': atlas_type or 'hive_table'
        }

        if get_catalog_search_cluster():
            data['entityFilters']['criterion'].append({
                'attributeName':
                'qualifiedName',
                'operator':
                'contains',
                'attributeValue':
                '@' + get_catalog_search_cluster()
            })

        if owner:
            data['entityFilters']['criterion'].append({
                'attributeName': 'owner',
                'operator': 'startsWith',
                'attributeValue': owner
            })

        try:
            atlas_response = self._root.post('/v2/search/basic',
                                             data=json.dumps(data),
                                             contenttype=_JSON_CONTENT_TYPE)

            # Adapt Atlas entities to Navigator structure in the results
            if 'entities' in atlas_response:
                for atlas_entity in atlas_response['entities']:
                    if atlas_type != 'Asset' or atlas_entity['typeName'].lower(
                    ) in ['hive_db', 'hive_table', 'hive_column']:
                        response['results'].append(
                            self.adapt_atlas_entity_to_navigator(atlas_entity))

            return response
        except RestException as e:
            if e.code == 401:
                raise raise_popup_exception(
                    'Hue could not authenticate to Atlas', detail=e)
            else:
                raise raise_popup_exception('Hue could not query Atlas',
                                            detail=e)

    # search_enties is only used by the table browser to fetch child entities of a given table or database.
    def search_entities(self,
                        query_s,
                        limit=100,
                        offset=0,
                        raw_query=False,
                        **filters):
        try:
            found_entities = []

            search_terms = [term for term in query_s.strip().split()
                            ] if query_s else []
            parentPath = None
            for term in search_terms:
                if 'parentPath:' in term:
                    name, val = term.split(':')
                    parentPath = val.strip('"').lstrip('/').replace('/', '.')

            if query_s == 'type:database':
                if get_catalog_search_cluster():
                    atlas_dsl_query = 'from hive_db where qualifiedName like \'*@%s\' limit %s' % (
                        get_catalog_search_cluster(), limit)
                else:
                    atlas_dsl_query = 'from hive_db limit %s' % limit
            elif not parentPath:
                return found_entities
            else:
                atlas_type = 'hive_table' if parentPath.count(
                    '.') == 0 else 'hive_column'
                if get_catalog_search_cluster():
                    atlas_dsl_query = 'from %s where qualifiedName like \'%s*@%s\' limit %s' % (
                        atlas_type, parentPath, get_catalog_search_cluster(),
                        limit)
                else:
                    atlas_dsl_query = 'from %s where qualifiedName like \'%s*\' limit %s' % (
                        atlas_type, parentPath, limit)

            atlas_response = self._root.get('/v2/search/dsl?query=%s' %
                                            atlas_dsl_query)

            # Adapt Atlas entities to Navigator structure in the results
            if 'entities' in atlas_response:
                for atlas_entity in atlas_response['entities']:
                    found_entities.append(
                        self.adapt_atlas_entity_to_navigator(atlas_entity))

            return found_entities
        except RestException as e:
            if e.code == 401:
                raise raise_popup_exception(
                    'Hue could not authenticate to Atlas', detail=e)
            else:
                raise raise_popup_exception('Hue could not query Atlas',
                                            detail=e)

    def suggest(self, prefix=None):
        try:
            return self._root.get('interactive/suggestions?query=%s' %
                                  (prefix or '*'))
        except RestException as e:
            if e.code == 401:
                raise raise_popup_exception(
                    'Hue could not authenticate to Atlas', detail=e)
            else:
                raise raise_popup_exception('Failed to search for entities',
                                            detail=e)

    def get_entity(self, entity_id):
        """
    # TODO: get entity by Atlas __guid or qualifiedName
    GET /v2/search/dsl?query=?
    """
        try:
            return self._root.get('entities/%s' % entity_id,
                                  headers=self.__headers,
                                  params=self.__params)
        except RestException as e:
            msg = 'Failed to get entity %s: %s' % (entity_id, str(e))
            LOG.error(msg)
            raise CatalogApiException(e.message)

    def update_entity(self, entity, **metadata):
        """
    PUT /api/v3/entities/:id
    http://cloudera.github.io/navigator/apidocs/v3/path__v3_entities_-id-.html
    """
        try:
            # Workarounds NAV-6187: if we don't re-send those, they would get erased.
            properties = {
                'name': entity['name'],
                'description': entity['description'],
                'properties': entity['properties'] or {},
                'customProperties': entity['customProperties'] or {}
            }
            properties.update(metadata)
            data = json.dumps(properties)

            return self._root.put('entities/%(identity)s' % entity,
                                  params=self.__params,
                                  data=data,
                                  contenttype=_JSON_CONTENT_TYPE,
                                  allow_redirects=True,
                                  clear_cookies=True)
        except RestException as e:
            if e.code == 401:
                raise raise_popup_exception(
                    'Hue could not authenticate to Atlas', detail=e)
            else:
                raise raise_popup_exception('Failed to update entity',
                                            detail=e)

    def get_cluster_source_ids(self):
        return []
        # params = (
        #   ('query', 'clusterName:"%s"' % get_navigator_hue_server_name()),
        #   ('limit', 200),
        # )

        # LOG.info(params)
        # return self._root.get('entities', headers=self.__headers, params=params)

    def add_tags(self, entity_id, tags):
        entity = self.get_entity(entity_id)
        new_tags = entity['tags'] or []
        new_tags.extend(tags)
        return self.update_entity(entity, tags=new_tags)

    def delete_tags(self, entity_id, tags):
        entity = self.get_entity(entity_id)
        new_tags = entity['tags'] or []
        for tag in tags:
            if tag in new_tags:
                new_tags.remove(tag)
        return self.update_entity(entity, tags=new_tags)

    def update_properties(self,
                          entity_id,
                          properties,
                          modified_custom_metadata=None,
                          deleted_custom_metadata_keys=None):
        entity = self.get_entity(entity_id)

        if modified_custom_metadata:
            properties['properties'] = entity['properties'] or {}
            properties['properties'].update(modified_custom_metadata)
        if deleted_custom_metadata_keys:
            properties['properties'] = entity['properties'] or {}
            for key in deleted_custom_metadata_keys:
                if key in properties['properties']:
                    del properties['properties'][key]
        return self.update_entity(entity, **properties)

    def delete_metadata_properties(self, entity_id, property_keys):
        entity = self.get_entity(entity_id)
        new_props = entity['properties'] or {}
        for key in property_keys:
            if key in new_props:
                del new_props[key]
        return self.update_entity(entity, properties=new_props)

    def get_lineage(self, entity_id):
        """
    GET /api/v3/lineage/entityIds=:id
    http://cloudera.github.io/navigator/apidocs/v3/path__v3_lineage.html
    """
        try:
            params = self.__params

            params += (('entityIds', entity_id), )

            return self._root.get('lineage',
                                  headers=self.__headers,
                                  params=params)
        except RestException as e:
            if e.code == 401:
                raise raise_popup_exception(
                    'Hue could not authenticate to Atlas', detail=e)
            else:
                raise raise_popup_exception('Failed to get lineage', detail=e)

    def create_namespace(self, namespace, description=None):
        try:
            data = json.dumps({'name': namespace, 'description': description})
            return self._root.post('models/namespaces/',
                                   data=data,
                                   contenttype=_JSON_CONTENT_TYPE,
                                   clear_cookies=True)
        except RestException as e:
            if e.code == 401:
                raise raise_popup_exception(
                    'Hue could not authenticate to Atlas', detail=e)
            else:
                raise raise_popup_exception('Failed to create namespace',
                                            detail=e)

    def get_namespace(self, namespace):
        try:
            return self._root.get('models/namespaces/%(namespace)s' %
                                  {'namespace': namespace})
        except RestException as e:
            if e.code == 401:
                raise raise_popup_exception(
                    'Hue could not authenticate to Atlas', detail=e)
            else:
                raise raise_popup_exception('Failed to get namespace',
                                            detail=e)

    def create_namespace_property(self, namespace, properties):
        try:
            data = json.dumps(properties)
            return self._root.post(
                'models/namespaces/%(namespace)s/properties' %
                {'namespace': namespace},
                data=data,
                contenttype=_JSON_CONTENT_TYPE,
                clear_cookies=True)
        except RestException as e:
            if e.code == 401:
                raise raise_popup_exception(
                    'Hue could not authenticate to Atlas', detail=e)
            else:
                raise raise_popup_exception('Failed to create namespace',
                                            detail=e)

    def get_namespace_properties(self, namespace):
        try:
            return self._root.get(
                'models/namespaces/%(namespace)s/properties' %
                {'namespace': namespace})
        except RestException as e:
            if e.code == 401:
                raise raise_popup_exception(
                    'Hue could not authenticate to Atlas', detail=e)
            else:
                raise raise_popup_exception('Failed to create namespace',
                                            detail=e)

    def map_namespace_property(self, clazz, properties):
        try:
            data = json.dumps(properties)
            return self._root.post(
                'models/packages/nav/classes/%(class)s/properties' %
                {'class': clazz},
                data=data,
                contenttype=_JSON_CONTENT_TYPE,
                clear_cookies=True)
        except RestException as e:
            if e.code == 401:
                raise raise_popup_exception(
                    'Hue could not authenticate to Atlas', detail=e)
            else:
                raise raise_popup_exception('Failed to map class', detail=e)

    def get_model_properties_mapping(self):
        try:
            return self._root.get('models/properties/mappings')
        except RestException as e:
            if e.code == 401:
                raise raise_popup_exception(
                    'Hue could not authenticate to Atlas', detail=e)
            else:
                raise raise_popup_exception(
                    'Failed to get models properties mappings', detail=e)

    def _fillup_properties(self):
        global _HAS_CATALOG_NAMESPACE

        if _HAS_CATALOG_NAMESPACE is None:
            response = self.get_namespace(namespace=AtlasApi.CATALOG_NAMESPACE)
            if not response:
                self.create_namespace(
                    namespace=AtlasApi.CATALOG_NAMESPACE,
                    description="Set of fields to augment the data catalog")

            properties = self.get_namespace_properties(
                namespace=AtlasApi.CATALOG_NAMESPACE)

            if not [
                    _property for _property in properties
                    if _property['name'] == 'relatedDocuments'
            ]:
                self.create_namespace_property(
                    namespace=AtlasApi.CATALOG_NAMESPACE,
                    properties={
                        "name": "relatedDocuments",
                        "displayName": "Related documents",
                        "description":
                        "List of Hue document UUIDs related to this entity",
                        "multiValued": True,
                        "maxLength": 36,
                        "pattern": ".*",  # UUID
                        "enumValues": None,
                        "type": "TEXT"
                    })

                # Might want to check if the mapping is already done
                for clazz in ('hv_table', 'hv_view'):
                    self.map_namespace_property(clazz,
                                                properties=[{
                                                    "namespace":
                                                    AtlasApi.CATALOG_NAMESPACE,
                                                    "name":
                                                    "relatedDocuments"
                                                }])

            _HAS_CATALOG_NAMESPACE = True

    def _get_boosted_term(self, term):
        return 'AND'.join([
            '(%s)' % 'OR'.join([
                '(%s:%s*^%s)' % (field, term, weight)
                for (field, weight) in AtlasApi.DEFAULT_SEARCH_FIELDS
            ]),  # Matching fields
            '(%s)' % 'OR'.join([
                '(%s:[* TO *])' % field
                for (field, weight) in AtlasApi.DEFAULT_SEARCH_FIELDS
            ])  # Boost entities with enriched fields
            # Could add certain customProperties and properties
        ])

    def _clean_path(self, path):
        return path.rstrip('/').split('/')[-1], self._escape_slashes(
            path.rstrip('/'))

    def _escape_slashes(self, s):
        return s.replace('/', '\/')
Exemplo n.º 47
0
class OozieApi(object):
    def __init__(self,
                 oozie_url,
                 user,
                 security_enabled=False,
                 api_version=API_VERSION,
                 ssl_cert_ca_verify=True):
        self._url = posixpath.join(oozie_url, api_version)
        self._client = HttpClient(self._url, logger=LOG)

        if security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

        self._root = Resource(self._client)
        self._security_enabled = security_enabled
        # To store username info
        if hasattr(user, 'username'):
            self.user = user.username
        else:
            self.user = user
        self.api_version = api_version

    def __str__(self):
        return "OozieApi at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    def _get_params(self):
        if self.security_enabled:
            return {'doAs': self.user, 'timezone': TIME_ZONE.get()}
        return {
            'user.name': DEFAULT_USER,
            'doAs': self.user,
            'timezone': TIME_ZONE.get()
        }

    def _get_oozie_properties(self, properties=None):
        defaults = {
            'user.name': self.user,
        }

        if properties is not None:
            defaults.update(properties)

        return defaults

    VALID_JOB_FILTERS = ('name', 'user', 'group', 'status', 'startcreatedtime',
                         'text')
    VALID_LOG_FILTERS = set(('recent', 'limit', 'loglevel', 'text'))

    def get_jobs(self, jobtype, offset=None, cnt=None, filters=None):
        """
    Get a list of Oozie jobs.

    Note that offset is 1-based.
    kwargs is used for filtering and may be one of VALID_FILTERS: name, user, group, status
    """
        params = self._get_params()
        if offset is not None:
            params['offset'] = str(offset)
        if cnt is not None:
            params['len'] = str(cnt)
        if filters is None:
            filters = []
        params['jobtype'] = jobtype

        filter_list = []
        for key, val in filters:
            if key not in OozieApi.VALID_JOB_FILTERS:
                raise ValueError(
                    '"%s" is not a valid filter for selecting jobs' % (key, ))
            filter_list.append('%s=%s' % (key, val))
        params['filter'] = ';'.join(filter_list)

        # Send the request
        resp = self._root.get('jobs', params)
        if jobtype == 'wf':
            wf_list = WorkflowList(self, resp, filters=filters)
        elif jobtype == 'coord':
            wf_list = CoordinatorList(self, resp, filters=filters)
        else:
            wf_list = BundleList(self, resp, filters=filters)
        return wf_list

    def get_workflows(self, offset=None, cnt=None, filters=None):
        return self.get_jobs('wf', offset, cnt, filters)

    def get_coordinators(self, offset=None, cnt=None, filters=None):
        return self.get_jobs('coord', offset, cnt, filters)

    def get_bundles(self, offset=None, cnt=None, filters=None):
        return self.get_jobs('bundle', offset, cnt, filters)

    # TODO: make get_job accept any jobid
    def get_job(self, jobid):
        """
    get_job(jobid) -> Workflow
    """
        params = self._get_params()
        resp = self._root.get('job/%s' % (jobid, ), params)
        wf = Workflow(self, resp)
        return wf

    def get_coordinator(self, jobid, offset=None, cnt=None, filters=None):
        params = self._get_params()
        if offset is not None:
            params['offset'] = str(offset)
        if cnt is not None:
            params['len'] = str(cnt)
        if filters is None:
            filters = {}
        params.update({'order': 'desc'})

        filter_list = []
        for key, val in filters:
            if key not in OozieApi.VALID_JOB_FILTERS:
                raise ValueError(
                    '"%s" is not a valid filter for selecting jobs' % (key, ))
            filter_list.append('%s=%s' % (key, val))
        params['filter'] = ';'.join(filter_list)

        resp = self._root.get('job/%s' % (jobid, ), params)
        return Coordinator(self, resp)

    def get_bundle(self, jobid):
        params = self._get_params()
        resp = self._root.get('job/%s' % (jobid, ), params)
        return Bundle(self, resp)

    def get_job_definition(self, jobid):
        """
    get_job_definition(jobid) -> Definition (xml string)
    """
        params = self._get_params()
        params['show'] = 'definition'
        return self._root.get('job/%s' % (jobid, ), params)

    def get_job_log(self, jobid, logfilter=None):
        """
    get_job_log(jobid) -> Log (xml string)
    """
        params = self._get_params()
        params['show'] = 'log'

        filter_list = []
        if logfilter is None:
            logfilter = []
        for key, val in logfilter:
            if key not in OozieApi.VALID_LOG_FILTERS:
                raise ValueError('"%s" is not a valid filter for job logs' %
                                 (key, ))
            filter_list.append('%s=%s' % (key, val))
        params['logfilter'] = ';'.join(filter_list)
        return self._root.get('job/%s' % (jobid, ), params)

    def get_job_status(self, jobid):
        params = self._get_params()
        params['show'] = 'status'

        xml = self._root.get('job/%s' % (jobid, ), params)
        return xml

    def get_action(self, action_id):
        if 'C@' in action_id:
            Klass = CoordinatorAction
        elif 'B@' in action_id:
            Klass = BundleAction
        else:
            Klass = WorkflowAction
        params = self._get_params()
        resp = self._root.get('job/%s' % (action_id, ), params)
        return Klass(resp)

    def job_control(self, jobid, action, properties=None, parameters=None):
        """
    job_control(jobid, action) -> None
    Raise RestException on error.
    """
        if action not in ('start', 'suspend', 'resume', 'kill', 'rerun',
                          'coord-rerun', 'bundle-rerun', 'change', 'ignore',
                          'update'):
            msg = 'Invalid oozie job action: %s' % (action, )
            LOG.error(msg)
            raise ValueError(msg)
        properties = self._get_oozie_properties(properties)
        params = self._get_params()
        params['action'] = action
        if parameters is not None:
            params.update(parameters)

        return self._root.put('job/%s' % jobid,
                              params,
                              data=config_gen(properties),
                              contenttype=_XML_CONTENT_TYPE)

    def submit_workflow(self, application_path, properties=None):
        """
    submit_workflow(application_path, properties=None) -> jobid

    Raise RestException on error.
    """
        defaults = {
            'oozie.wf.application.path': application_path,
            'user.name': self.user,
        }

        if properties is not None:
            defaults.update(properties)
        properties = defaults

        return self.submit_job(properties)

    # Is name actually submit_coord?
    def submit_job(self, properties=None):
        """
    submit_job(properties=None, id=None) -> jobid

    Raise RestException on error.
    """
        defaults = {
            'user.name': self.user,
        }

        if properties is not None:
            defaults.update(properties)

        properties = defaults

        params = self._get_params()
        resp = self._root.post('jobs',
                               params,
                               data=config_gen(properties),
                               contenttype=_XML_CONTENT_TYPE)
        return resp['id']

    def dryrun(self, properties=None):
        defaults = {
            'user.name': self.user,
        }

        if properties is not None:
            defaults.update(properties)

        properties = defaults

        params = self._get_params()
        params['action'] = 'dryrun'
        return self._root.post('jobs',
                               params,
                               data=config_gen(properties),
                               contenttype=_XML_CONTENT_TYPE)

    def rerun(self, jobid, properties=None, params=None):
        properties = self._get_oozie_properties(properties)
        if params is None:
            params = self._get_params()
        else:
            self._get_params().update(params)

        params['action'] = 'rerun'

        return self._root.put('job/%s' % jobid,
                              params,
                              data=config_gen(properties),
                              contenttype=_XML_CONTENT_TYPE)

    def get_build_version(self):
        """
    get_build_version() -> Build version (dictionary)
    """
        params = self._get_params()
        resp = self._root.get('admin/build-version', params)
        return resp

    def get_instrumentation(self):
        params = self._get_params()
        resp = self._root.get('admin/instrumentation', params)
        return resp

    def get_metrics(self):
        params = self._get_params()
        resp = self._root.get('admin/metrics', params)
        return resp

    def get_configuration(self):
        """
    get_configuration() -> Oozie config (dictionary)
    """
        params = self._get_params()
        resp = self._root.get('admin/configuration', params)
        return resp

    def get_oozie_status(self):
        """
    get_oozie_status() -> Oozie status (dictionary)
    """
        params = self._get_params()
        resp = self._root.get('admin/status', params)
        return resp

    def get_oozie_slas(self, **kwargs):
        """
    filter=
      app_name=my-sla-app
      id=0000002-131206135002457-oozie-oozi-W
      nominal_start=2013-06-18T00:01Z
      nominal_end=2013-06-23T00:01Z
    """
        params = self._get_params()
        params['filter'] = ';'.join(
            ['%s=%s' % (key, val) for key, val in kwargs.iteritems()])
        resp = self._root.get('sla', params)
        return resp['slaSummaryList']
Exemplo n.º 48
0
class ResourceManagerApi(object):

  def __init__(self, rm_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(rm_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    self._thread_local = threading.local() # To store user info

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def _get_params(self):
    params = {}

    if self.username != DEFAULT_USER.get(): # We impersonate if needed
      params['doAs'] = self.username
      if not self.security_enabled:
        params['user.name'] = DEFAULT_USER.get()

    return params

  def __str__(self):
    return "ResourceManagerApi at %s" % (self._url,)

  def setuser(self, user):
    curr = self.user
    self._thread_local.user = user
    return curr

  @property
  def user(self):
    return self.username # Backward compatibility

  @property
  def username(self):
    try:
      return self._thread_local.user
    except AttributeError:
      return DEFAULT_USER.get()

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def cluster(self, **kwargs):
    params = self._get_params()
    return self._execute(self._root.get, 'cluster/info', params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def apps(self, **kwargs):
    params = self._get_params()
    params.update(kwargs)
    return self._execute(self._root.get, 'cluster/apps', params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def app(self, app_id):
    params = self._get_params()
    return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, app_id):
    data = {'state': 'KILLED'}
    token = None

    # Tokens are managed within the kill method but should be moved out when not alpha anymore or we support submitting an app.
    if self.security_enabled and False:
      full_token = self.delegation_token()
      if 'token' not in full_token:
        raise PopupException(_('YARN did not return any token field.'), detail=smart_str(full_token))
      data['X-Hadoop-Delegation-Token'] = token = full_token.pop('token')
      LOG.debug('Received delegation token %s' % full_token)

    try:
      params = self._get_params()
      return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)
    finally:
      if token:
        self.cancel_token(token)

  def delegation_token(self):
    params = self._get_params()
    data = {'renewer': self.username}
    return self._execute(self._root.post, 'cluster/delegation-token', params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

  def cancel_token(self, token):
    params = self._get_params()
    headers = {'Hadoop-YARN-RM-Delegation-Token': token}
    LOG.debug('Canceling delegation token of ' % self.username)
    return self._execute(self._root.delete, 'cluster/delegation-token', params=params, headers=headers)

  def _execute(self, function, *args, **kwargs):
    response = function(*args, **kwargs)

    # YARN-2605: Yarn does not use proper HTTP redirects when the standby RM has
    # failed back to the master RM.
    if isinstance(response, str) and response.startswith('This is standby RM. Redirecting to the current active RM'):
      raise YarnFailoverOccurred(response)

    return response
Exemplo n.º 49
0
class SolrApi(object):
    """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """

    def __init__(
        self, solr_url, user, security_enabled=SECURITY_ENABLED.get(), ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()
    ):
        self._url = solr_url
        self._user = user
        self._client = HttpClient(self._url, logger=LOG)
        self.security_enabled = security_enabled

        if self.security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

        self._root = resource.Resource(self._client)

        # The Kerberos handshake requires two requests in order to authenticate,
        # but if our first request is a PUT/POST, it might flat-out reject the
        # first request if the body is too large. So, connect here in order to get
        # a cookie so future PUT/POSTs will be pre-authenticated.
        if self.security_enabled:
            self._root.invoke("HEAD", "/")

    def _get_params(self):
        if self.security_enabled:
            return (("doAs", self._user),)
        return (("user.name", DEFAULT_USER), ("doAs", self._user))

    def _get_q(self, query):
        q_template = "(%s)" if len(query["qs"]) >= 2 else "%s"
        return "OR".join([q_template % (q["q"] or EMPTY_QUERY.get()) for q in query["qs"]]).encode("utf-8")

    def _get_aggregate_function(self, facet):
        props = {
            "field": facet["field"],
            "aggregate": facet["properties"]["aggregate"] if "properties" in facet else facet["aggregate"],
        }

        if props["aggregate"] == "median":
            return "percentile(%(field)s,50)" % props
        else:
            return "%(aggregate)s(%(field)s)" % props

    def _get_range_borders(self, collection, query):
        props = {}
        GAPS = {
            "5MINUTES": {
                "histogram-widget": {"coeff": "+3", "unit": "SECONDS"},  # ~100 slots
                "bucket-widget": {"coeff": "+3", "unit": "SECONDS"},  # ~100 slots
                "bar-widget": {"coeff": "+3", "unit": "SECONDS"},  # ~100 slots
                "facet-widget": {"coeff": "+1", "unit": "MINUTES"},  # ~10 slots
            },
            "30MINUTES": {
                "histogram-widget": {"coeff": "+20", "unit": "SECONDS"},
                "bucket-widget": {"coeff": "+20", "unit": "SECONDS"},
                "bar-widget": {"coeff": "+20", "unit": "SECONDS"},
                "facet-widget": {"coeff": "+5", "unit": "MINUTES"},
            },
            "1HOURS": {
                "histogram-widget": {"coeff": "+30", "unit": "SECONDS"},
                "bucket-widget": {"coeff": "+30", "unit": "SECONDS"},
                "bar-widget": {"coeff": "+30", "unit": "SECONDS"},
                "facet-widget": {"coeff": "+10", "unit": "MINUTES"},
            },
            "12HOURS": {
                "histogram-widget": {"coeff": "+7", "unit": "MINUTES"},
                "bucket-widget": {"coeff": "+7", "unit": "MINUTES"},
                "bar-widget": {"coeff": "+7", "unit": "MINUTES"},
                "facet-widget": {"coeff": "+1", "unit": "HOURS"},
            },
            "1DAYS": {
                "histogram-widget": {"coeff": "+15", "unit": "MINUTES"},
                "bucket-widget": {"coeff": "+15", "unit": "MINUTES"},
                "bar-widget": {"coeff": "+15", "unit": "MINUTES"},
                "facet-widget": {"coeff": "+3", "unit": "HOURS"},
            },
            "2DAYS": {
                "histogram-widget": {"coeff": "+30", "unit": "MINUTES"},
                "bucket-widget": {"coeff": "+30", "unit": "MINUTES"},
                "bar-widget": {"coeff": "+30", "unit": "MINUTES"},
                "facet-widget": {"coeff": "+6", "unit": "HOURS"},
            },
            "7DAYS": {
                "histogram-widget": {"coeff": "+3", "unit": "HOURS"},
                "bucket-widget": {"coeff": "+3", "unit": "HOURS"},
                "bar-widget": {"coeff": "+3", "unit": "HOURS"},
                "facet-widget": {"coeff": "+1", "unit": "DAYS"},
            },
            "1MONTHS": {
                "histogram-widget": {"coeff": "+12", "unit": "HOURS"},
                "bucket-widget": {"coeff": "+12", "unit": "HOURS"},
                "bar-widget": {"coeff": "+12", "unit": "HOURS"},
                "facet-widget": {"coeff": "+5", "unit": "DAYS"},
            },
            "3MONTHS": {
                "histogram-widget": {"coeff": "+1", "unit": "DAYS"},
                "bucket-widget": {"coeff": "+1", "unit": "DAYS"},
                "bar-widget": {"coeff": "+1", "unit": "DAYS"},
                "facet-widget": {"coeff": "+30", "unit": "DAYS"},
            },
            "1YEARS": {
                "histogram-widget": {"coeff": "+3", "unit": "DAYS"},
                "bucket-widget": {"coeff": "+3", "unit": "DAYS"},
                "bar-widget": {"coeff": "+3", "unit": "DAYS"},
                "facet-widget": {"coeff": "+12", "unit": "MONTHS"},
            },
            "2YEARS": {
                "histogram-widget": {"coeff": "+7", "unit": "DAYS"},
                "bucket-widget": {"coeff": "+7", "unit": "DAYS"},
                "bar-widget": {"coeff": "+7", "unit": "DAYS"},
                "facet-widget": {"coeff": "+3", "unit": "MONTHS"},
            },
            "10YEARS": {
                "histogram-widget": {"coeff": "+1", "unit": "MONTHS"},
                "bucket-widget": {"coeff": "+1", "unit": "MONTHS"},
                "bar-widget": {"coeff": "+1", "unit": "MONTHS"},
                "facet-widget": {"coeff": "+1", "unit": "YEARS"},
            },
        }

        time_field = collection["timeFilter"].get("field")

        if time_field and (collection["timeFilter"]["value"] != "all" or collection["timeFilter"]["type"] == "fixed"):
            # fqs overrides main time filter
            fq_time_ids = [fq["id"] for fq in query["fqs"] if fq["field"] == time_field]
            props["time_filter_overrides"] = fq_time_ids
            props["time_field"] = time_field

            if collection["timeFilter"]["type"] == "rolling":
                props["field"] = collection["timeFilter"]["field"]
                props["from"] = "NOW-%s" % collection["timeFilter"]["value"]
                props["to"] = "NOW"
                props["gap"] = GAPS.get(collection["timeFilter"]["value"])
            elif collection["timeFilter"]["type"] == "fixed":
                props["field"] = collection["timeFilter"]["field"]
                props["from"] = collection["timeFilter"]["from"]
                props["to"] = collection["timeFilter"]["to"]
                props["fixed"] = True

        return props

    def _get_time_filter_query(self, timeFilter, facet):
        if "fixed" in timeFilter:
            props = {}
            stat_facet = {"min": timeFilter["from"], "max": timeFilter["to"]}
            _compute_range_facet(facet["widgetType"], stat_facet, props, stat_facet["min"], stat_facet["max"])
            gap = props["gap"]
            unit = re.split("\d+", gap)[1]
            return {
                "start": "%(from)s/%(unit)s" % {"from": timeFilter["from"], "unit": unit},
                "end": "%(to)s/%(unit)s" % {"to": timeFilter["to"], "unit": unit},
                "gap": "%(gap)s" % props,  # add a 'auto'
            }
        else:
            gap = timeFilter["gap"][facet["widgetType"]]
            return {
                "start": "%(from)s/%(unit)s" % {"from": timeFilter["from"], "unit": gap["unit"]},
                "end": "%(to)s/%(unit)s" % {"to": timeFilter["to"], "unit": gap["unit"]},
                "gap": "%(coeff)s%(unit)s/%(unit)s" % gap,  # add a 'auto'
            }

    def _get_fq(self, collection, query):
        params = ()
        timeFilter = {}

        if collection:
            timeFilter = self._get_range_borders(collection, query)
        if timeFilter and not timeFilter.get("time_filter_overrides"):
            params += (("fq", urllib.unquote(utf_quoter("%(field)s:[%(from)s TO %(to)s]" % timeFilter))),)

        # Merge facets queries on same fields
        grouped_fqs = groupby(query["fqs"], lambda x: (x["type"], x["field"]))
        merged_fqs = []
        for key, group in grouped_fqs:
            field_fq = next(group)
            for fq in group:
                for f in fq["filter"]:
                    field_fq["filter"].append(f)
            merged_fqs.append(field_fq)

        for fq in merged_fqs:
            if fq["type"] == "field":
                fields = fq["field"] if type(fq["field"]) == list else [fq["field"]]  # 2D facets support
                for field in fields:
                    f = []
                    for _filter in fq["filter"]:
                        values = (
                            _filter["value"] if type(_filter["value"]) == list else [_filter["value"]]
                        )  # 2D facets support
                        if fields.index(field) < len(values):  # Lowest common field denominator
                            value = values[fields.index(field)]
                            exclude = "-" if _filter["exclude"] else ""
                            if value is not None and " " in force_unicode(value):
                                value = force_unicode(value).replace('"', '\\"')
                                f.append('%s%s:"%s"' % (exclude, field, value))
                            else:
                                f.append("%s{!field f=%s}%s" % (exclude, field, value))
                    _params = "{!tag=%(id)s}" % fq + " ".join(f)
                    params += (("fq", urllib.unquote(utf_quoter(_params))),)
            elif fq["type"] == "range":
                params += (
                    (
                        "fq",
                        "{!tag=%(id)s}" % fq
                        + " ".join(
                            [
                                urllib.unquote(
                                    utf_quoter(
                                        "%s%s:[%s TO %s}"
                                        % ("-" if field["exclude"] else "", fq["field"], f["from"], f["to"])
                                    )
                                )
                                for field, f in zip(fq["filter"], fq["properties"])
                            ]
                        ),
                    ),
                )
            elif fq["type"] == "range-up":
                params += (
                    (
                        "fq",
                        "{!tag=%(id)s}" % fq
                        + " ".join(
                            [
                                urllib.unquote(
                                    utf_quoter(
                                        "%s%s:[%s TO %s}"
                                        % (
                                            "-" if field["exclude"] else "",
                                            fq["field"],
                                            f["from"] if fq["is_up"] else "*",
                                            "*" if fq["is_up"] else f["from"],
                                        )
                                    )
                                )
                                for field, f in zip(fq["filter"], fq["properties"])
                            ]
                        ),
                    ),
                )
            elif fq["type"] == "map":
                _keys = fq.copy()
                _keys.update(fq["properties"])
                params += (
                    (
                        "fq",
                        "{!tag=%(id)s}" % fq
                        + urllib.unquote(
                            utf_quoter(
                                "%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}" % _keys
                            )
                        ),
                    ),
                )

        return params

    def query(self, collection, query):
        solr_query = {}

        solr_query["collection"] = collection["name"]

        if query.get("download"):
            solr_query["rows"] = 1000
            solr_query["start"] = 0
        else:
            solr_query["rows"] = int(collection["template"]["rows"] or 10)
            solr_query["start"] = int(query["start"])

        solr_query["rows"] = min(solr_query["rows"], 1000)
        solr_query["start"] = min(solr_query["start"], 10000)

        params = self._get_params() + (
            ("q", self._get_q(query)),
            ("wt", "json"),
            ("rows", solr_query["rows"]),
            ("start", solr_query["start"]),
        )

        if any(collection["facets"]):
            params += (("facet", "true"), ("facet.mincount", 0), ("facet.limit", 10))
            json_facets = {}

            timeFilter = self._get_range_borders(collection, query)

            for facet in collection["facets"]:
                if facet["type"] == "query":
                    params += (("facet.query", "%s" % facet["field"]),)
                elif facet["type"] == "range" or facet["type"] == "range-up":
                    keys = {
                        "id": "%(id)s" % facet,
                        "field": facet["field"],
                        "key": "%(field)s-%(id)s" % facet,
                        "start": facet["properties"]["start"],
                        "end": facet["properties"]["end"],
                        "gap": facet["properties"]["gap"],
                        "mincount": int(facet["properties"]["mincount"]),
                    }

                    if (
                        timeFilter
                        and timeFilter["time_field"] == facet["field"]
                        and (
                            facet["id"] not in timeFilter["time_filter_overrides"]
                            or facet["widgetType"] != "histogram-widget"
                        )
                    ):
                        keys.update(self._get_time_filter_query(timeFilter, facet))

                    params += (
                        (
                            "facet.range",
                            "{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s"
                            % keys,
                        ),
                    )
                elif facet["type"] == "field":
                    keys = {
                        "id": "%(id)s" % facet,
                        "field": facet["field"],
                        "key": "%(field)s-%(id)s" % facet,
                        "limit": int(facet["properties"].get("limit", 10))
                        + (1 if facet["widgetType"] == "facet-widget" else 0),
                        "mincount": int(facet["properties"]["mincount"]),
                    }
                    params += (
                        (
                            "facet.field",
                            "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s"
                            % keys,
                        ),
                    )
                elif facet["type"] == "nested":
                    _f = {
                        "field": facet["field"],
                        "limit": int(facet["properties"].get("limit", 10))
                        + (1 if facet["widgetType"] == "facet-widget" else 0),
                        "mincount": int(facet["properties"]["mincount"]),
                    }

                    if "start" in facet["properties"]:
                        _f.update(
                            {
                                "type": "range",
                                "start": facet["properties"]["start"],
                                "end": facet["properties"]["end"],
                                "gap": facet["properties"]["gap"],
                            }
                        )
                        if (
                            timeFilter
                            and timeFilter["time_field"] == facet["field"]
                            and (
                                facet["id"] not in timeFilter["time_filter_overrides"]
                                or facet["widgetType"] != "bucket-widget"
                            )
                        ):
                            _f.update(self._get_time_filter_query(timeFilter, facet))
                    else:
                        _f.update({"type": "terms", "field": facet["field"], "excludeTags": facet["id"]})

                    if facet["properties"]["facets"]:
                        if facet["properties"]["facets"][0]["aggregate"] == "count":
                            _f["facet"] = {
                                "d2": {
                                    "type": "terms",
                                    "field": "%(field)s" % facet["properties"]["facets"][0],
                                    "limit": int(facet["properties"]["facets"][0].get("limit", 10)),
                                    "mincount": int(facet["properties"]["facets"][0]["mincount"]),
                                }
                            }
                            if len(facet["properties"]["facets"]) > 1:  # Get 3rd dimension calculation
                                _f["facet"]["d2"]["facet"] = {
                                    "d2": self._get_aggregate_function(facet["properties"]["facets"][1])
                                }
                        else:
                            _f["facet"] = {"d2": self._get_aggregate_function(facet["properties"]["facets"][0])}

                    json_facets[facet["id"]] = _f
                elif facet["type"] == "function":
                    json_facets[facet["id"]] = self._get_aggregate_function(facet)
                    json_facets["processEmpty"] = True
                elif facet["type"] == "pivot":
                    if facet["properties"]["facets"] or facet["widgetType"] == "map-widget":
                        fields = facet["field"]
                        fields_limits = []
                        for f in facet["properties"]["facets"]:
                            fields_limits.append("f.%s.facet.limit=%s" % (f["field"], f["limit"]))
                            fields_limits.append("f.%s.facet.mincount=%s" % (f["field"], f["mincount"]))
                            fields += "," + f["field"]
                        keys = {
                            "id": "%(id)s" % facet,
                            "key": "%(field)s-%(id)s" % facet,
                            "field": facet["field"],
                            "fields": fields,
                            "limit": int(facet["properties"].get("limit", 10)),
                            "mincount": int(facet["properties"]["mincount"]),
                            "fields_limits": " ".join(fields_limits),
                        }
                        params += (
                            (
                                "facet.pivot",
                                "{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s"
                                % keys,
                            ),
                        )

            if json_facets:
                params += (("json.facet", json.dumps(json_facets)),)

        params += self._get_fq(collection, query)

        if collection["template"]["fieldsSelected"] and collection["template"]["isGridLayout"]:
            fields = set(
                collection["template"]["fieldsSelected"] + [collection["idField"]] if collection["idField"] else []
            )
            # Add field if needed
            if collection["template"]["leafletmap"].get("latitudeField"):
                fields.add(collection["template"]["leafletmap"]["latitudeField"])
            if collection["template"]["leafletmap"].get("longitudeField"):
                fields.add(collection["template"]["leafletmap"]["longitudeField"])
            if collection["template"]["leafletmap"].get("labelField"):
                fields.add(collection["template"]["leafletmap"]["labelField"])
            params += (("fl", urllib.unquote(utf_quoter(",".join(list(fields))))),)
        else:
            params += (("fl", "*"),)

        params += (("hl", "true"), ("hl.fl", "*"), ("hl.snippets", 5), ("hl.fragsize", 1000))

        if collection["template"]["fieldsSelected"]:
            fields = []
            for field in collection["template"]["fieldsSelected"]:
                attribute_field = filter(
                    lambda attribute: field == attribute["name"], collection["template"]["fieldsAttributes"]
                )
                if attribute_field:
                    if attribute_field[0]["sort"]["direction"]:
                        fields.append("%s %s" % (field, attribute_field[0]["sort"]["direction"]))
            if fields:
                params += (("sort", ",".join(fields)),)

        response = self._root.get("%(collection)s/select" % solr_query, params)
        return self._get_json(response)

    def suggest(self, collection, query):
        try:
            params = self._get_params() + (
                ("suggest", "true"),
                ("suggest.build", "true"),
                ("suggest.q", query["q"]),
                ("wt", "json"),
            )
            if query.get("dictionary"):
                params += (("suggest.dictionary", query["dictionary"]),)
            response = self._root.get("%s/suggest" % collection, params)
            return self._get_json(response)
        except RestException, e:
            raise PopupException(e, title=_("Error while accessing Solr"))
Exemplo n.º 50
0
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get()):
    self._url = solr_url
    self._user = user
    self._client = HttpClient(self._url, logger=LOG)
    self.security_enabled = security_enabled
    if self.security_enabled:
      self._client.set_kerberos_auth()
    self._root = resource.Resource(self._client)


  def _get_params(self):
    if self.security_enabled:
      return (('doAs', self._user ),)
    return (('user.name', DEFAULT_USER), ('doAs', self._user),)


  def _get_q(self, query):
    q_template = '(%s)' if len(query['qs']) >= 2 else '%s'
    return 'OR'.join([q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']]).encode('utf-8')


  def _get_fq(self, query):
    params = ()

    for fq in query['fqs']:
      if fq['type'] == 'field':
        # This does not work if spaces in Solr:
        # params += (('fq', ' '.join([urllib.unquote(utf_quoter('{!tag=%s}{!field f=%s}%s' % (fq['field'], fq['field'], _filter))) for _filter in fq['filter']])),)
        fields = fq['field'].split(':') # 2D facets support
        for field in fields:
          f = []
          for _filter in fq['filter']:
            values = _filter['value'].split(':') if len(fields) > 1 else [_filter['value']]
            if fields.index(field) < len(values): # Lowest common field denominator
              value = values[fields.index(field)]
              exclude = '-' if _filter['exclude'] else ''
              if value is not None and ' ' in value:
                f.append('%s%s:"%s"' % (exclude, field, value))
              else:
                f.append('%s{!field f=%s}%s' % (exclude, field, value))
          _params ='{!tag=%s}' % field + ' '.join(f)
          params += (('fq', urllib.unquote(utf_quoter(_params))),)
      elif fq['type'] == 'range':
        params += (('fq', '{!tag=%s}' % fq['field'] + ' '.join([urllib.unquote(
                    utf_quoter('%s%s:[%s TO %s}' % ('-' if field['exclude'] else '', fq['field'], f['from'], f['to']))) for field, f in zip(fq['filter'], fq['properties'])])),)
    return params

  def query(self, collection, query):
    solr_query = {}

    solr_query['collection'] = collection['name']

    if query.get('download'):
      solr_query['rows'] = 1000
      solr_query['start'] = 0
    else:
      solr_query['rows'] = int(collection['template']['rows'] or 10)
      solr_query['start'] = int(query['start'])

    solr_query['rows'] = min(solr_query['rows'], 1000)
    solr_query['start'] = min(solr_query['start'], 10000)

    params = self._get_params() + (
        ('q', self._get_q(query)),
        ('wt', 'json'),
        ('rows', solr_query['rows']),
        ('start', solr_query['start']),
    )

    if any(collection['facets']):
      params += (
        ('facet', 'true'),
        ('facet.mincount', 0),
        ('facet.limit', 10),
      )

      for facet in collection['facets']:
        if facet['type'] == 'query':
          params += (('facet.query', '%s' % facet['field']),)
        elif facet['type'] == 'range':
          params += tuple([
             ('facet.range', '{!ex=%s}%s' % (facet['field'], facet['field'])),
             ('f.%s.facet.range.start' % facet['field'], facet['properties']['start']),
             ('f.%s.facet.range.end' % facet['field'], facet['properties']['end']),
             ('f.%s.facet.range.gap' % facet['field'], facet['properties']['gap']),
             ('f.%s.facet.mincount' % facet['field'], facet['properties']['mincount']),]
          )
        elif facet['type'] == 'field':
          params += (
              ('facet.field', '{!ex=%s}%s' % (facet['field'], facet['field'])),
              ('f.%s.facet.limit' % facet['field'], int(facet['properties'].get('limit', 10)) + 1),
              ('f.%s.facet.mincount' % facet['field'], int(facet['properties']['mincount'])),
          )
        elif facet['type'] == 'pivot':
          if facet['properties']['facets']:
            fields = facet['field']
            for f in facet['properties']['facets']:
              params += (('f.%s.facet.limit' % f['field'], f['limit']),)
              fields += ',' + f['field']
            params += (
                ('facet.pivot', '{!ex=%s}%s' % (fields, fields)),
                ('f.%s.facet.limit' % facet['field'], int(facet['properties'].get('limit', 10))),
                ('facet.pivot.mincount', int(facet['properties']['mincount'])),
            )

    params += self._get_fq(query)

    if collection['template']['fieldsSelected'] and collection['template']['isGridLayout']:
      fields = set(collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else [])
      # Add field if needed
      if collection['template']['leafletmap'].get('latitudeField'):
        fields.add(collection['template']['leafletmap']['latitudeField'])
      if collection['template']['leafletmap'].get('longitudeField'):
        fields.add(collection['template']['leafletmap']['longitudeField'])
      if collection['template']['leafletmap'].get('labelField'):
        fields.add(collection['template']['leafletmap']['labelField'])
      params += (('fl', urllib.unquote(utf_quoter(','.join(list(fields))))),)
    else:
      params += (('fl', '*'),)

    params += (
      ('hl', 'true'),
      ('hl.fl', '*'),
      ('hl.snippets', 3),
      ('hl.fragsize', 0),
    )

    if collection['template']['fieldsSelected']:
      fields = []
      for field in collection['template']['fieldsSelected']:
        attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes'])
        if attribute_field:
          if attribute_field[0]['sort']['direction']:
            fields.append('%s %s' % (field, attribute_field[0]['sort']['direction']))
      if fields:
        params += (
          ('sort', ','.join(fields)),
        )

    response = self._root.get('%(collection)s/select' % solr_query, params)

    return self._get_json(response)


  def suggest(self, solr_query, hue_core):
    try:
      params = self._get_params() + (
          ('q', solr_query['q']),
          ('wt', 'json'),
      )
      response = self._root.get('%(collection)s/suggest' % solr_query, params)
      if type(response) != dict:
        response = json.loads(response)
      return response
    except RestException, e:
      raise PopupException(e, title=_('Error while accessing Solr'))
Exemplo n.º 51
0
class MapreduceApi(object):

  def __init__(self, mr_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(mr_url, 'proxy')
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    self._thread_local = threading.local()  # To store user info

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "MapreduceApi at %s" % (self._url,)

  def _get_params(self):
    params = {}

    if self.username != DEFAULT_USER.get():  # We impersonate if needed
      params['doAs'] = self.username
      if not self._security_enabled:
        params['user.name'] = DEFAULT_USER.get()

    return params

  @property
  def url(self):
    return self._url

  @property
  def username(self):
    try:
      return self._thread_local.user
    except AttributeError:
      return DEFAULT_USER.get()

  def setuser(self, user):
    curr = self.username
    self._thread_local.user = user
    return curr

  def job(self, user, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def counters(self, job_id):
    app_id = job_id.replace('job', 'application')
    response = self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})
    # If it hits the job history server, it will return HTML.
    # Simply return None in this case because there isn't much data there.
    if isinstance(response, basestring):
      return None
    else:
      return response

  def tasks(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def job_attempts(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/jobattempts' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def conf(self, job_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/conf' % {'app_id': app_id, 'job_id': job_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_counters(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempts(self, job_id, task_id):
    app_id = job_id.replace('job', 'application')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt(self, job_id, task_id, attempt_id):
    app_id = job_id.replace('job', 'application')
    job_id = job_id.replace('application', 'job')
    return self._root.get('%(app_id)s/ws/%(version)s/mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'app_id': app_id, 'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id, 'version': _API_VERSION}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def kill(self, job_id):
    app_id = job_id.replace('job', 'application')
    get_resource_manager(self.username).kill(app_id) # We need to call the RM
Exemplo n.º 52
0
Arquivo: api.py Projeto: findhy/hue
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url, user, security_enabled=SECURITY_ENABLED.get()):
    self._url = solr_url
    self._user = user
    self._client = HttpClient(self._url, logger=LOG)
    self.security_enabled = security_enabled
    if self.security_enabled:
      self._client.set_kerberos_auth()
    self._root = resource.Resource(self._client)

  def _get_params(self):
    if self.security_enabled:
      return (('doAs', self._user ),)
    return (('user.name', DEFAULT_USER), ('doAs', self._user),)


  def query(self, collection, query):
    solr_query = {}

    solr_query['collection'] = collection['name']

    if query.get('download'):
      solr_query['rows'] = 1000
      solr_query['start'] = 0
    else:
      solr_query['rows'] = int(collection['template']['rows'] or 10)
      solr_query['start'] = int(query['start'])

    solr_query['rows'] = min(solr_query['rows'], 1000)
    solr_query['start'] = min(solr_query['start'], 10000)

    q_template = '(%s)' if len(query['qs']) >= 2 else '%s'

    params = self._get_params() + (
        ('q', 'OR'.join([q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']])),
        ('wt', 'json'),
        ('rows', solr_query['rows']),
        ('start', solr_query['start']),
    )

    if any(collection['facets']):
      params += (
        ('facet', 'true'),
        ('facet.mincount', 0),
        ('facet.limit', 10),
      )
      for facet in collection['facets']:
        if facet['type'] == 'query':
          params += (('facet.query', '%s' % facet['field']),)
        elif facet['type'] == 'range':
          params += tuple([
             ('facet.range', '{!ex=%s}%s' % (facet['field'], facet['field'])),
             ('f.%s.facet.range.start' % facet['field'], facet['properties']['start']),
             ('f.%s.facet.range.end' % facet['field'], facet['properties']['end']),
             ('f.%s.facet.range.gap' % facet['field'], facet['properties']['gap']),
             ('f.%s.facet.mincount' % facet['field'], facet['properties']['mincount']),]
          )
        elif facet['type'] == 'field':
          params += (
              ('facet.field', '{!ex=%s}%s' % (facet['field'], facet['field'])),
              ('f.%s.facet.limit' % facet['field'], int(facet['properties'].get('limit', 10)) + 1),
              ('f.%s.facet.mincount' % facet['field'], int(facet['properties']['mincount'])),
          )

    for fq in query['fqs']:
      if fq['type'] == 'field':
        # This does not work if spaces in Solr:
        # params += (('fq', ' '.join([urllib.unquote(utf_quoter('{!tag=%s}{!field f=%s}%s' % (fq['field'], fq['field'], _filter))) for _filter in fq['filter']])),)
        f = []
        for _filter in fq['filter']:
          if _filter is not None and ' ' in _filter:
            f.append('%s:"%s"' % (fq['field'], _filter))
          else:
            f.append('{!field f=%s}%s' % (fq['field'], _filter))
        params += (('fq', urllib.unquote(utf_quoter('{!tag=%s}' % fq['field'] + ' '.join(f)))),)
      elif fq['type'] == 'range':
        params += (('fq', '{!tag=%s}' % fq['field'] + ' '.join([urllib.unquote(utf_quoter('%s:[%s TO %s}' % (fq['field'], f['from'], f['to']))) for f in fq['properties']])),)

    if collection['template']['fieldsSelected'] and collection['template']['isGridLayout']:
      fields = collection['template']['fieldsSelected'] + [collection['idField']] if collection['idField'] else []
      params += (('fl', urllib.unquote(utf_quoter(','.join(fields)))),)
    else:
      params += (('fl', '*'),)

    params += (
      ('hl', 'true'),
      ('hl.fl', '*'),
      ('hl.snippets', 3)
    )

    if collection['template']['fieldsSelected']:
      fields = []
      for field in collection['template']['fieldsSelected']:
        attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes'])
        if attribute_field:
          if attribute_field[0]['sort']['direction']:
            fields.append('%s %s' % (field, attribute_field[0]['sort']['direction']))
      if fields:
        params += (
          ('sort', ','.join(fields)),
        )

    response = self._root.get('%(collection)s/select' % solr_query, params)

    return self._get_json(response)


  def suggest(self, solr_query, hue_core):
    try:
      params = self._get_params() + (
          ('q', solr_query['q']),
          ('wt', 'json'),
      )
      response = self._root.get('%(collection)s/suggest' % solr_query, params)
      if type(response) != dict:
        response = json.loads(response)
      return response
    except RestException, e:
      raise PopupException(e, title=_('Error while accessing Solr'))
Exemplo n.º 53
0
class SolrApi(object):
    """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
    def __init__(self,
                 solr_url,
                 user,
                 security_enabled=SECURITY_ENABLED.get(),
                 ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()):
        self._url = solr_url
        self._user = user
        self._client = HttpClient(self._url, logger=LOG)
        self.security_enabled = security_enabled

        if self.security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

        self._root = resource.Resource(self._client)

        # The Kerberos handshake requires two requests in order to authenticate,
        # but if our first request is a PUT/POST, it might flat-out reject the
        # first request if the body is too large. So, connect here in order to get
        # a cookie so future PUT/POSTs will be pre-authenticated.
        if self.security_enabled:
            self._root.invoke('HEAD', '/')

    def _get_params(self):
        if self.security_enabled:
            return (('doAs', self._user), )
        return (
            ('user.name', SERVER_USER.get()),
            ('doAs', self._user),
        )

    def _get_q(self, query):
        q_template = '(%s)' if len(query['qs']) >= 2 else '%s'
        return 'OR'.join([
            q_template % (q['q'] or EMPTY_QUERY.get()) for q in query['qs']
        ]).encode('utf-8')

    def _get_aggregate_function(self, facet):
        props = {
            'field':
            facet['field'],
            'aggregate':
            facet['properties']['aggregate']
            if 'properties' in facet else facet['aggregate']
        }

        if props['aggregate'] == 'median':
            return 'percentile(%(field)s,50)' % props
        else:
            return '%(aggregate)s(%(field)s)' % props

    def _get_range_borders(self, collection, query):
        props = {}
        GAPS = {
            '5MINUTES': {
                'histogram-widget': {
                    'coeff': '+3',
                    'unit': 'SECONDS'
                },  # ~100 slots
                'bucket-widget': {
                    'coeff': '+3',
                    'unit': 'SECONDS'
                },  # ~100 slots
                'bar-widget': {
                    'coeff': '+3',
                    'unit': 'SECONDS'
                },  # ~100 slots
                'facet-widget': {
                    'coeff': '+1',
                    'unit': 'MINUTES'
                },  # ~10 slots
            },
            '30MINUTES': {
                'histogram-widget': {
                    'coeff': '+20',
                    'unit': 'SECONDS'
                },
                'bucket-widget': {
                    'coeff': '+20',
                    'unit': 'SECONDS'
                },
                'bar-widget': {
                    'coeff': '+20',
                    'unit': 'SECONDS'
                },
                'facet-widget': {
                    'coeff': '+5',
                    'unit': 'MINUTES'
                },
            },
            '1HOURS': {
                'histogram-widget': {
                    'coeff': '+30',
                    'unit': 'SECONDS'
                },
                'bucket-widget': {
                    'coeff': '+30',
                    'unit': 'SECONDS'
                },
                'bar-widget': {
                    'coeff': '+30',
                    'unit': 'SECONDS'
                },
                'facet-widget': {
                    'coeff': '+10',
                    'unit': 'MINUTES'
                },
            },
            '12HOURS': {
                'histogram-widget': {
                    'coeff': '+7',
                    'unit': 'MINUTES'
                },
                'bucket-widget': {
                    'coeff': '+7',
                    'unit': 'MINUTES'
                },
                'bar-widget': {
                    'coeff': '+7',
                    'unit': 'MINUTES'
                },
                'facet-widget': {
                    'coeff': '+1',
                    'unit': 'HOURS'
                },
            },
            '1DAYS': {
                'histogram-widget': {
                    'coeff': '+15',
                    'unit': 'MINUTES'
                },
                'bucket-widget': {
                    'coeff': '+15',
                    'unit': 'MINUTES'
                },
                'bar-widget': {
                    'coeff': '+15',
                    'unit': 'MINUTES'
                },
                'facet-widget': {
                    'coeff': '+3',
                    'unit': 'HOURS'
                },
            },
            '2DAYS': {
                'histogram-widget': {
                    'coeff': '+30',
                    'unit': 'MINUTES'
                },
                'bucket-widget': {
                    'coeff': '+30',
                    'unit': 'MINUTES'
                },
                'bar-widget': {
                    'coeff': '+30',
                    'unit': 'MINUTES'
                },
                'facet-widget': {
                    'coeff': '+6',
                    'unit': 'HOURS'
                },
            },
            '7DAYS': {
                'histogram-widget': {
                    'coeff': '+3',
                    'unit': 'HOURS'
                },
                'bucket-widget': {
                    'coeff': '+3',
                    'unit': 'HOURS'
                },
                'bar-widget': {
                    'coeff': '+3',
                    'unit': 'HOURS'
                },
                'facet-widget': {
                    'coeff': '+1',
                    'unit': 'DAYS'
                },
            },
            '1MONTHS': {
                'histogram-widget': {
                    'coeff': '+12',
                    'unit': 'HOURS'
                },
                'bucket-widget': {
                    'coeff': '+12',
                    'unit': 'HOURS'
                },
                'bar-widget': {
                    'coeff': '+12',
                    'unit': 'HOURS'
                },
                'facet-widget': {
                    'coeff': '+5',
                    'unit': 'DAYS'
                },
            },
            '3MONTHS': {
                'histogram-widget': {
                    'coeff': '+1',
                    'unit': 'DAYS'
                },
                'bucket-widget': {
                    'coeff': '+1',
                    'unit': 'DAYS'
                },
                'bar-widget': {
                    'coeff': '+1',
                    'unit': 'DAYS'
                },
                'facet-widget': {
                    'coeff': '+30',
                    'unit': 'DAYS'
                },
            },
            '1YEARS': {
                'histogram-widget': {
                    'coeff': '+3',
                    'unit': 'DAYS'
                },
                'bucket-widget': {
                    'coeff': '+3',
                    'unit': 'DAYS'
                },
                'bar-widget': {
                    'coeff': '+3',
                    'unit': 'DAYS'
                },
                'facet-widget': {
                    'coeff': '+12',
                    'unit': 'MONTHS'
                },
            },
            '2YEARS': {
                'histogram-widget': {
                    'coeff': '+7',
                    'unit': 'DAYS'
                },
                'bucket-widget': {
                    'coeff': '+7',
                    'unit': 'DAYS'
                },
                'bar-widget': {
                    'coeff': '+7',
                    'unit': 'DAYS'
                },
                'facet-widget': {
                    'coeff': '+3',
                    'unit': 'MONTHS'
                },
            },
            '10YEARS': {
                'histogram-widget': {
                    'coeff': '+1',
                    'unit': 'MONTHS'
                },
                'bucket-widget': {
                    'coeff': '+1',
                    'unit': 'MONTHS'
                },
                'bar-widget': {
                    'coeff': '+1',
                    'unit': 'MONTHS'
                },
                'facet-widget': {
                    'coeff': '+1',
                    'unit': 'YEARS'
                },
            }
        }

        time_field = collection['timeFilter'].get('field')

        if time_field and (collection['timeFilter']['value'] != 'all'
                           or collection['timeFilter']['type'] == 'fixed'):
            # fqs overrides main time filter
            fq_time_ids = [
                fq['id'] for fq in query['fqs'] if fq['field'] == time_field
            ]
            props['time_filter_overrides'] = fq_time_ids
            props['time_field'] = time_field

            if collection['timeFilter']['type'] == 'rolling':
                props['field'] = collection['timeFilter']['field']
                props['from'] = 'NOW-%s' % collection['timeFilter']['value']
                props['to'] = 'NOW'
                props['gap'] = GAPS.get(collection['timeFilter']['value'])
            elif collection['timeFilter']['type'] == 'fixed':
                props['field'] = collection['timeFilter']['field']
                props['from'] = collection['timeFilter']['from']
                props['to'] = collection['timeFilter']['to']
                props['fixed'] = True

        return props

    def _get_time_filter_query(self, timeFilter, facet):
        if 'fixed' in timeFilter:
            props = {}
            stat_facet = {'min': timeFilter['from'], 'max': timeFilter['to']}
            _compute_range_facet(facet['widgetType'], stat_facet, props,
                                 stat_facet['min'], stat_facet['max'])
            gap = props['gap']
            unit = re.split('\d+', gap)[1]
            return {
                'start': '%(from)s/%(unit)s' % {
                    'from': timeFilter['from'],
                    'unit': unit
                },
                'end': '%(to)s/%(unit)s' % {
                    'to': timeFilter['to'],
                    'unit': unit
                },
                'gap': '%(gap)s' % props,  # add a 'auto'
            }
        else:
            gap = timeFilter['gap'][facet['widgetType']]
            return {
                'start': '%(from)s/%(unit)s' % {
                    'from': timeFilter['from'],
                    'unit': gap['unit']
                },
                'end': '%(to)s/%(unit)s' % {
                    'to': timeFilter['to'],
                    'unit': gap['unit']
                },
                'gap': '%(coeff)s%(unit)s/%(unit)s' % gap,  # add a 'auto'
            }

    def _get_fq(self, collection, query):
        params = ()
        timeFilter = {}

        if collection:
            timeFilter = self._get_range_borders(collection, query)
        if timeFilter and not timeFilter.get('time_filter_overrides'):
            params += (('fq',
                        urllib.unquote(
                            utf_quoter('%(field)s:[%(from)s TO %(to)s]' %
                                       timeFilter))), )

        # Merge facets queries on same fields
        grouped_fqs = groupby(query['fqs'], lambda x: (x['type'], x['field']))
        merged_fqs = []
        for key, group in grouped_fqs:
            field_fq = next(group)
            for fq in group:
                for f in fq['filter']:
                    field_fq['filter'].append(f)
            merged_fqs.append(field_fq)

        for fq in merged_fqs:
            if fq['type'] == 'field':
                fields = fq['field'] if type(fq['field']) == list else [
                    fq['field']
                ]  # 2D facets support
                for field in fields:
                    f = []
                    for _filter in fq['filter']:
                        values = _filter['value'] if type(
                            _filter['value']) == list else [
                                _filter['value']
                            ]  # 2D facets support
                        if fields.index(field) < len(
                                values):  # Lowest common field denominator
                            value = values[fields.index(field)]
                            exclude = '-' if _filter['exclude'] else ''
                            if value is not None and ' ' in force_unicode(
                                    value):
                                value = force_unicode(value).replace(
                                    '"', '\\"')
                                f.append('%s%s:"%s"' % (exclude, field, value))
                            else:
                                f.append('%s{!field f=%s}%s' %
                                         (exclude, field, value))
                    _params = '{!tag=%(id)s}' % fq + ' '.join(f)
                    params += (('fq', urllib.unquote(utf_quoter(_params))), )
            elif fq['type'] == 'range':
                params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([
                    urllib.unquote(
                        utf_quoter('%s%s:[%s TO %s}' %
                                   ('-' if field['exclude'] else '',
                                    fq['field'], f['from'], f['to'])))
                    for field, f in zip(fq['filter'], fq['properties'])
                ])), )
            elif fq['type'] == 'range-up':
                params += (('fq', '{!tag=%(id)s}' % fq + ' '.join([
                    urllib.unquote(
                        utf_quoter('%s%s:[%s TO %s}' %
                                   ('-' if field['exclude'] else '',
                                    fq['field'], f['from'] if fq['is_up'] else
                                    '*', '*' if fq['is_up'] else f['from'])))
                    for field, f in zip(fq['filter'], fq['properties'])
                ])), )
            elif fq['type'] == 'map':
                _keys = fq.copy()
                _keys.update(fq['properties'])
                params += (('fq', '{!tag=%(id)s}' % fq + urllib.unquote(
                    utf_quoter(
                        '%(lat)s:[%(lat_sw)s TO %(lat_ne)s} AND %(lon)s:[%(lon_sw)s TO %(lon_ne)s}'
                        % _keys))), )

        return params

    def query(self, collection, query):
        solr_query = {}

        solr_query['collection'] = collection['name']

        if query.get('download'):
            solr_query['rows'] = 1000
            solr_query['start'] = 0
        else:
            solr_query['rows'] = int(collection['template']['rows'] or 10)
            solr_query['start'] = int(query['start'])

        solr_query['rows'] = min(solr_query['rows'], 1000)
        solr_query['start'] = min(solr_query['start'], 10000)

        params = self._get_params() + (
            ('q', self._get_q(query)),
            ('wt', 'json'),
            ('rows', solr_query['rows']),
            ('start', solr_query['start']),
        )

        if any(collection['facets']):
            params += (
                ('facet', 'true'),
                ('facet.mincount', 0),
                ('facet.limit', 10),
            )
            json_facets = {}

            timeFilter = self._get_range_borders(collection, query)

            for facet in collection['facets']:
                if facet['type'] == 'query':
                    params += (('facet.query', '%s' % facet['field']), )
                elif facet['type'] == 'range' or facet['type'] == 'range-up':
                    keys = {
                        'id': '%(id)s' % facet,
                        'field': facet['field'],
                        'key': '%(field)s-%(id)s' % facet,
                        'start': facet['properties']['start'],
                        'end': facet['properties']['end'],
                        'gap': facet['properties']['gap'],
                        'mincount': int(facet['properties']['mincount'])
                    }

                    if timeFilter and timeFilter['time_field'] == facet[
                            'field'] and (
                                facet['id']
                                not in timeFilter['time_filter_overrides']
                                or facet['widgetType'] != 'histogram-widget'):
                        keys.update(
                            self._get_time_filter_query(timeFilter, facet))

                    params += ((
                        'facet.range',
                        '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s'
                        % keys), )
                elif facet['type'] == 'field':
                    keys = {
                        'id':
                        '%(id)s' % facet,
                        'field':
                        facet['field'],
                        'key':
                        '%(field)s-%(id)s' % facet,
                        'limit':
                        int(facet['properties'].get('limit', 10)) +
                        (1 if facet['widgetType'] == 'facet-widget' else 0),
                        'mincount':
                        int(facet['properties']['mincount'])
                    }
                    params += ((
                        'facet.field',
                        '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s'
                        % keys), )
                elif facet['type'] == 'nested':
                    _f = {
                        'field':
                        facet['field'],
                        'limit':
                        int(facet['properties'].get('limit', 10)) +
                        (1 if facet['widgetType'] == 'facet-widget' else 0),
                        'mincount':
                        int(facet['properties']['mincount'])
                    }

                    if 'start' in facet['properties']:
                        _f.update({
                            'type': 'range',
                            'start': facet['properties']['start'],
                            'end': facet['properties']['end'],
                            'gap': facet['properties']['gap'],
                        })
                        if timeFilter and timeFilter['time_field'] == facet[
                                'field'] and (
                                    facet['id']
                                    not in timeFilter['time_filter_overrides']
                                    or facet['widgetType'] != 'bucket-widget'):
                            _f.update(
                                self._get_time_filter_query(timeFilter, facet))
                    else:
                        _f.update({
                            'type': 'terms',
                            'field': facet['field'],
                            'excludeTags': facet['id']
                        })

                    if facet['properties']['facets']:
                        if facet['properties']['facets'][0][
                                'aggregate'] == 'count':
                            _f['facet'] = {
                                'd2': {
                                    'type':
                                    'terms',
                                    'field':
                                    '%(field)s' %
                                    facet['properties']['facets'][0],
                                    'limit':
                                    int(facet['properties']['facets'][0].get(
                                        'limit', 10)),
                                    'mincount':
                                    int(facet['properties']['facets'][0]
                                        ['mincount'])
                                }
                            }
                            if len(facet['properties']['facets']
                                   ) > 1:  # Get 3rd dimension calculation
                                _f['facet']['d2']['facet'] = {
                                    'd2':
                                    self._get_aggregate_function(
                                        facet['properties']['facets'][1])
                                }
                        else:
                            _f['facet'] = {
                                'd2':
                                self._get_aggregate_function(
                                    facet['properties']['facets'][0])
                            }

                    json_facets[facet['id']] = _f
                elif facet['type'] == 'function':
                    json_facets[facet['id']] = self._get_aggregate_function(
                        facet)
                    json_facets['processEmpty'] = True
                elif facet['type'] == 'pivot':
                    if facet['properties']['facets'] or facet[
                            'widgetType'] == 'map-widget':
                        fields = facet['field']
                        fields_limits = []
                        for f in facet['properties']['facets']:
                            fields_limits.append('f.%s.facet.limit=%s' %
                                                 (f['field'], f['limit']))
                            fields_limits.append('f.%s.facet.mincount=%s' %
                                                 (f['field'], f['mincount']))
                            fields += ',' + f['field']
                        keys = {
                            'id': '%(id)s' % facet,
                            'key': '%(field)s-%(id)s' % facet,
                            'field': facet['field'],
                            'fields': fields,
                            'limit': int(facet['properties'].get('limit', 10)),
                            'mincount': int(facet['properties']['mincount']),
                            'fields_limits': ' '.join(fields_limits)
                        }
                        params += ((
                            'facet.pivot',
                            '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s'
                            % keys), )

            if json_facets:
                params += (('json.facet', json.dumps(json_facets)), )

        params += self._get_fq(collection, query)

        if collection['template']['fieldsSelected'] and collection['template'][
                'isGridLayout']:
            fields = set(
                collection['template']['fieldsSelected'] +
                [collection['idField']] if collection['idField'] else [])
            # Add field if needed
            if collection['template']['leafletmap'].get('latitudeField'):
                fields.add(
                    collection['template']['leafletmap']['latitudeField'])
            if collection['template']['leafletmap'].get('longitudeField'):
                fields.add(
                    collection['template']['leafletmap']['longitudeField'])
            if collection['template']['leafletmap'].get('labelField'):
                fields.add(collection['template']['leafletmap']['labelField'])
            params += (('fl',
                        urllib.unquote(utf_quoter(','.join(list(fields))))), )
        else:
            params += (('fl', '*'), )

        params += (
            ('hl', 'true'),
            ('hl.fl', '*'),
            ('hl.snippets', 5),
            ('hl.fragsize', 1000),
        )

        if collection['template']['fieldsSelected']:
            fields = []
            for field in collection['template']['fieldsSelected']:
                attribute_field = filter(
                    lambda attribute: field == attribute['name'],
                    collection['template']['fieldsAttributes'])
                if attribute_field:
                    if attribute_field[0]['sort']['direction']:
                        fields.append(
                            '%s %s' %
                            (field, attribute_field[0]['sort']['direction']))
            if fields:
                params += (('sort', ','.join(fields)), )

        response = self._root.get('%(collection)s/select' % solr_query, params)
        return self._get_json(response)

    def suggest(self, collection, query):
        try:
            params = self._get_params() + (
                ('suggest', 'true'),
                ('suggest.build', 'true'),
                ('suggest.q', query['q']),
                ('wt', 'json'),
            )
            if query.get('dictionary'):
                params += (('suggest.dictionary', query['dictionary']), )
            response = self._root.get('%s/suggest' % collection, params)
            return self._get_json(response)
        except RestException, e:
            raise PopupException(e, title=_('Error while accessing Solr'))
Exemplo n.º 54
0
class SolrApi(object):
    """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
    def __init__(self,
                 solr_url,
                 user,
                 security_enabled=SECURITY_ENABLED.get()
                 if search_enabled() else SECURITY_ENABLED.default,
                 ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()):
        self._url = solr_url
        self._user = user
        self._client = HttpClient(self._url, logger=LOG)
        self.security_enabled = security_enabled

        if self.security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

        self._root = resource.Resource(self._client)

        # The Kerberos handshake requires two requests in order to authenticate,
        # but if our first request is a PUT/POST, it might flat-out reject the
        # first request if the body is too large. So, connect here in order to get
        # a cookie so future PUT/POSTs will be pre-authenticated.
        if self.security_enabled:
            self._root.invoke('HEAD', '/')

    def query(self, collection, query):
        solr_query = {}

        solr_query['collection'] = collection['name']

        if query.get('download'):
            solr_query['rows'] = 1000
            solr_query['start'] = 0
        else:
            solr_query['rows'] = int(collection['template']['rows'] or 10)
            solr_query['start'] = int(query['start'])

        solr_query['rows'] = min(solr_query['rows'], 1000)
        solr_query['start'] = min(solr_query['start'], 10000)

        params = self._get_params() + (
            ('q', self._get_q(query)),
            ('wt', 'json'),
            ('rows', solr_query['rows']),
            ('start', solr_query['start']),
        )

        if any(collection['facets']):
            params += (
                ('facet', 'true'),
                ('facet.mincount', 0),
                ('facet.limit', 10),
            )
            json_facets = {}

            timeFilter = self._get_range_borders(collection, query)

            for facet in collection['facets']:
                if facet['type'] == 'query':
                    params += (('facet.query', '%s' % facet['field']), )
                elif facet['type'] == 'range' or facet['type'] == 'range-up':
                    keys = {
                        'id': '%(id)s' % facet,
                        'field': facet['field'],
                        'key': '%(field)s-%(id)s' % facet,
                        'start': facet['properties']['start'],
                        'end': facet['properties']['end'],
                        'gap': facet['properties']['gap'],
                        'mincount': int(facet['properties']['mincount'])
                    }

                    if timeFilter and timeFilter['time_field'] == facet[
                            'field'] and (
                                facet['id']
                                not in timeFilter['time_filter_overrides']
                                or facet['widgetType'] != 'histogram-widget'):
                        keys.update(
                            self._get_time_filter_query(timeFilter, facet))

                    params += ((
                        'facet.range',
                        '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s'
                        % keys), )
                elif facet['type'] == 'field':
                    keys = {
                        'id':
                        '%(id)s' % facet,
                        'field':
                        facet['field'],
                        'key':
                        '%(field)s-%(id)s' % facet,
                        'limit':
                        int(facet['properties'].get('limit', 10)) +
                        (1 if facet['widgetType'] == 'facet-widget' else 0),
                        'mincount':
                        int(facet['properties']['mincount'])
                    }

                    params += ((
                        'facet.field',
                        '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s'
                        % keys), )
                elif facet['type'] == 'nested':
                    _f = {
                        'field':
                        facet['field'],
                        'limit':
                        int(facet['properties'].get('limit', 10)) +
                        (1
                         if facet['widgetType'] == 'text-facet-widget' else 0),
                        'mincount':
                        int(facet['properties']['mincount']),
                        'sort': {
                            'count': facet['properties']['sort']
                        },
                    }
                    print facet

                    if facet['properties']['domain'].get(
                            'blockParent'
                    ) or facet['properties']['domain'].get('blockChildren'):
                        _f['domain'] = {}
                        if facet['properties']['domain'].get('blockParent'):
                            _f['domain']['blockParent'] = ' OR '.join(
                                facet['properties']['domain']['blockParent'])
                        if facet['properties']['domain'].get('blockChildren'):
                            _f['domain']['blockChildren'] = ' OR '.join(
                                facet['properties']['domain']['blockChildren'])

                    if 'start' in facet['properties'] and not facet[
                            'properties'].get('type') == 'field':
                        _f.update({
                            'type': 'range',
                            'start': facet['properties']['start'],
                            'end': facet['properties']['end'],
                            'gap': facet['properties']['gap'],
                        })
                        if timeFilter and timeFilter['time_field'] == facet[
                                'field'] and (
                                    facet['id']
                                    not in timeFilter['time_filter_overrides']
                                    or facet['widgetType'] != 'bucket-widget'):
                            _f.update(
                                self._get_time_filter_query(timeFilter, facet))
                    else:
                        _f.update({
                            'type': 'terms',
                            'field': facet['field'],
                            'excludeTags': facet['id'],
                            'offset': 0,
                            'numBuckets': True,
                            'allBuckets': True,
                            'prefix': ''
                        })
                        if facet['properties']['canRange'] and not facet[
                                'properties']['isDate']:
                            del _f['mincount']  # Numeric fields do not support

                    if facet['properties']['facets']:
                        self._n_facet_dimension(facet, _f,
                                                facet['properties']['facets'],
                                                1)
                        if facet['widgetType'] == 'text-facet-widget':
                            _fname = _f['facet'].keys()[0]
                            _f['sort'] = {_fname: facet['properties']['sort']}
                            # domain = '-d2:NaN' # Solr 6.4

                    json_facets[facet['id']] = _f
                elif facet['type'] == 'function':
                    json_facets[facet['id']] = self._get_aggregate_function(
                        facet)
                    json_facets['processEmpty'] = True
                elif facet['type'] == 'pivot':
                    if facet['properties']['facets'] or facet[
                            'widgetType'] == 'map-widget':
                        fields = facet['field']
                        fields_limits = []
                        for f in facet['properties']['facets']:
                            fields_limits.append('f.%s.facet.limit=%s' %
                                                 (f['field'], f['limit']))
                            fields_limits.append('f.%s.facet.mincount=%s' %
                                                 (f['field'], f['mincount']))
                            fields += ',' + f['field']
                        keys = {
                            'id': '%(id)s' % facet,
                            'key': '%(field)s-%(id)s' % facet,
                            'field': facet['field'],
                            'fields': fields,
                            'limit': int(facet['properties'].get('limit', 10)),
                            'mincount': int(facet['properties']['mincount']),
                            'fields_limits': ' '.join(fields_limits)
                        }
                        params += ((
                            'facet.pivot',
                            '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s'
                            % keys), )

            if json_facets:
                params += (('json.facet', json.dumps(json_facets)), )

        params += self._get_fq(collection, query)

        if collection['template']['fieldsSelected'] and collection['template'][
                'isGridLayout']:
            fields = set(
                collection['template']['fieldsSelected'] +
                [collection['idField']] if collection['idField'] else [])
            # Add field if needed
            if collection['template']['leafletmap'].get('latitudeField'):
                fields.add(
                    collection['template']['leafletmap']['latitudeField'])
            if collection['template']['leafletmap'].get('longitudeField'):
                fields.add(
                    collection['template']['leafletmap']['longitudeField'])
            if collection['template']['leafletmap'].get('labelField'):
                fields.add(collection['template']['leafletmap']['labelField'])
            fl = urllib.unquote(utf_quoter(','.join(list(fields))))
        else:
            fl = '*'

        nested_fields = self._get_nested_fields(collection)
        if nested_fields:
            fl += urllib.unquote(
                utf_quoter(',[child parentFilter="%s"]' %
                           ' OR '.join(nested_fields)))

        params += (('fl', fl), )

        params += (
            ('hl', 'true'),
            ('hl.fl', '*'),
            ('hl.snippets', 5),
            ('hl.fragsize', 1000),
        )

        if collection['template']['fieldsSelected']:
            fields = []
            for field in collection['template']['fieldsSelected']:
                attribute_field = filter(
                    lambda attribute: field == attribute['name'],
                    collection['template']['fieldsAttributes'])
                if attribute_field:
                    if attribute_field[0]['sort']['direction']:
                        fields.append(
                            '%s %s' %
                            (field, attribute_field[0]['sort']['direction']))
            if fields:
                params += (('sort', ','.join(fields)), )

        response = self._root.get('%(collection)s/select' % solr_query, params)
        return self._get_json(response)

    def _n_facet_dimension(self, widget, _f, facets, dim):
        facet = facets[0]
        f_name = 'dim_%02d:%s' % (dim, facet['field'])

        if facet['aggregate']['function'] == 'count':
            if 'facet' not in _f:
                _f['facet'] = {f_name: {}}
            else:
                _f['facet'][f_name] = {}
            _f = _f['facet']

            _f[f_name] = {
                'type': 'terms',
                'field': '%(field)s' % facet,
                'limit': int(facet.get('limit', 10)),
                'mincount': int(facet['mincount']),
                'numBuckets': True,
                'allBuckets': True,
                'prefix': ''
            }
            if widget['widgetType'] == 'tree2-widget' and facets[-1][
                    'aggregate']['function'] != 'count':
                _f['subcount'] = self._get_aggregate_function(facets[-1])

            if len(facets) > 1:  # Get n+1 dimension
                if facets[1]['aggregate']['function'] == 'count':
                    self._n_facet_dimension(widget, _f[f_name], facets[1:],
                                            dim + 1)
                else:
                    self._n_facet_dimension(widget, _f[f_name], facets[1:],
                                            dim)
        else:
            agg_function = self._get_aggregate_function(facet)
            _f['facet'] = {
                'agg_%02d_00:%s' % (dim, agg_function): agg_function
            }
            for i, _f_agg in enumerate(facets[1:], 1):
                if _f_agg['aggregate']['function'] != 'count':
                    agg_function = self._get_aggregate_function(_f_agg)
                    _f['facet']['agg_%02d_%02d:%s' %
                                (dim, i, agg_function)] = agg_function
                else:
                    self._n_facet_dimension(widget, _f, facets[i:],
                                            dim + 1)  # Get n+1 dimension
                    break

    def suggest(self, collection, query):
        try:
            params = self._get_params() + (
                ('suggest', 'true'),
                ('suggest.build', 'true'),
                ('suggest.q', query['q']),
                ('wt', 'json'),
            )
            if query.get('dictionary'):
                params += (('suggest.dictionary', query['dictionary']), )
            response = self._root.get('%s/suggest' % collection, params)
            return self._get_json(response)
        except RestException, e:
            raise PopupException(e, title=_('Error while accessing Solr'))
Exemplo n.º 55
0
class HistoryServerApi(object):

  def __init__(self, oozie_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    self._thread_local = threading.local()  # To store user info

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def __str__(self):
    return "HistoryServerApi at %s" % (self._url,)

  def _get_params(self):
    params = {}

    if self.username != DEFAULT_USER.get():  # We impersonate if needed
      params['doAs'] = self.username
      if not self._security_enabled:
        params['user.name'] = DEFAULT_USER.get()

    return params

  @property
  def url(self):
    return self._url

  @property
  def user(self):
    return self.username  # Backward compatibility

  @property
  def username(self):
    try:
      return self._thread_local.user
    except AttributeError:
      return DEFAULT_USER.get()

  def setuser(self, user):
    curr = self.user
    self._thread_local.user = user
    return curr

  def job(self, user, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def counters(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/counters' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def conf(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/conf' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def job_attempts(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def tasks(self, job_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks' % {'job_id': job_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task(self, job_id, task_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {'job_id': job_id, 'task_id': task_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempts(self, job_id, task_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {'job_id': job_id, 'task_id': task_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_counters(self, job_id, task_id):
    job_id = job_id.replace('application', 'job')
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {'job_id': job_id, 'task_id': task_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt(self, job_id, task_id, attempt_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})

  def task_attempt_counters(self, job_id, task_id, attempt_id):
    return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters' % {'job_id': job_id, 'task_id': task_id, 'attempt_id': attempt_id}, params=self._get_params(), headers={'Accept': _JSON_CONTENT_TYPE})
Exemplo n.º 56
0
class SolrApi(object):
  """
  http://wiki.apache.org/solr/CoreAdmin#CoreAdminHandler
  """
  def __init__(self, solr_url=None, user=None, security_enabled=False, ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()):
    if solr_url is None and hasattr(SOLR_URL, 'get'):
      solr_url = SOLR_URL.get()

    if solr_url:
      self._url = solr_url
      self._user = user
      self._client = HttpClient(self._url, logger=LOG)
      self.security_enabled = security_enabled or SECURITY_ENABLED.get()

      if self.security_enabled:
        self._client.set_kerberos_auth()

      self._client.set_verify(ssl_cert_ca_verify)

      self._root = resource.Resource(self._client)

      # The Kerberos handshake requires two requests in order to authenticate,
      # but if our first request is a PUT/POST, it might flat-out reject the
      # first request if the body is too large. So, connect here in order to get
      # a cookie so future PUT/POSTs will be pre-authenticated.
      if self.security_enabled:
        self._root.invoke('HEAD', '/')


  def query(self, collection, query):
    solr_query = {}

    solr_query['collection'] = collection['name']

    if query.get('download'):
      solr_query['rows'] = 1000
      solr_query['start'] = 0
    else:
      solr_query['rows'] = int(collection['template']['rows'] or 10)
      solr_query['start'] = int(query['start'])

    solr_query['rows'] = min(solr_query['rows'], 1000)
    solr_query['start'] = min(solr_query['start'], 10000)

    params = self._get_params() + (
        ('q', self._get_q(query)),
        ('wt', 'json'),
        ('rows', solr_query['rows']),
        ('start', solr_query['start']),
    )

    if any(collection['facets']):
      params += (
        ('facet', 'true'),
        ('facet.mincount', 0),
        ('facet.limit', 10),
      )
      json_facets = {}

      timeFilter = self._get_range_borders(collection, query)

      for facet in collection['facets']:
        if facet['type'] == 'query':
          params += (('facet.query', '%s' % facet['field']),)
        elif facet['type'] == 'range' or facet['type'] == 'range-up':
          keys = {
              'id': '%(id)s' % facet,
              'field': facet['field'],
              'key': '%(field)s-%(id)s' % facet,
              'start': facet['properties']['start'],
              'end': facet['properties']['end'],
              'gap': facet['properties']['gap'],
              'mincount': int(facet['properties']['mincount'])
          }

          if timeFilter and timeFilter['time_field'] == facet['field'] and (facet['id'] not in timeFilter['time_filter_overrides'] or facet['widgetType'] != 'histogram-widget'):
            keys.update(self._get_time_filter_query(timeFilter, facet))

          params += (
             ('facet.range', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.range.start=%(start)s f.%(field)s.facet.range.end=%(end)s f.%(field)s.facet.range.gap=%(gap)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys),
          )
        elif facet['type'] == 'field':
          keys = {
              'id': '%(id)s' % facet,
              'field': facet['field'],
              'key': '%(field)s-%(id)s' % facet,
              'limit': int(facet['properties'].get('limit', 10)) + (1 if facet['widgetType'] == 'facet-widget' else 0),
              'mincount': int(facet['properties']['mincount'])
          }

          params += (
              ('facet.field', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s}%(field)s' % keys),
          )
        elif facet['type'] == 'nested':
          _f = {}
          if facet['properties']['facets']:
            self._n_facet_dimension(facet, _f, facet['properties']['facets'], 1, timeFilter)

          if facet['properties'].get('domain'):
            if facet['properties']['domain'].get('blockParent') or facet['properties']['domain'].get('blockChildren'):
              _f['domain'] = {}
              if facet['properties']['domain'].get('blockParent'):
                _f['domain']['blockParent'] = ' OR '.join(facet['properties']['domain']['blockParent'])
              if facet['properties']['domain'].get('blockChildren'):
                _f['domain']['blockChildren'] = ' OR '.join(facet['properties']['domain']['blockChildren'])

          if _f:
            sort = {'count': facet['properties']['facets'][0]['sort']}
            for i, agg in enumerate(self._get_dimension_aggregates(facet['properties']['facets'][1:])):
              if agg['sort'] != 'default':
                agg_function = self._get_aggregate_function(agg)
                sort = {'agg_%02d_%02d:%s' % (1, i, agg_function): agg['sort']}

            if sort.get('count') == 'default':
              sort['count'] = 'desc'

            dim_key = [key for key in _f['facet'].keys() if 'dim' in key][0]
            _f['facet'][dim_key].update({
                  'excludeTags': facet['id'],
                  'offset': 0,
                  'numBuckets': True,
                  'allBuckets': True,
                  'sort': sort
                  #'prefix': '' # Forbidden on numeric fields
              })
            json_facets[facet['id']] = _f['facet'][dim_key]
        elif facet['type'] == 'function':
          if facet['properties']['facets']:
            json_facets[facet['id']] = self._get_aggregate_function(facet['properties']['facets'][0])
            if facet['properties']['compare']['is_enabled']:
              # TODO: global compare override
              unit = re.split('\d+', facet['properties']['compare']['gap'])[1]
              json_facets[facet['id']] = {
                'type': 'range',
                'field': collection['timeFilter'].get('field'),
                'start': 'NOW/%s-%s-%s' % (unit, facet['properties']['compare']['gap'], facet['properties']['compare']['gap']),
                'end': 'NOW/%s' % unit,
                'gap': '+%(gap)s' % facet['properties']['compare'],
                'facet': {facet['id']: json_facets[facet['id']]}
              }
            if facet['properties']['filter']['is_enabled']:
              json_facets[facet['id']] = {
                'type': 'query',
                'q': facet['properties']['filter']['query'] or EMPTY_QUERY.get(),
                'facet': {facet['id']: json_facets[facet['id']]}
              }
            json_facets['processEmpty'] = True
        elif facet['type'] == 'pivot':
          if facet['properties']['facets'] or facet['widgetType'] == 'map-widget':
            fields = facet['field']
            fields_limits = []
            for f in facet['properties']['facets']:
              fields_limits.append('f.%s.facet.limit=%s' % (f['field'], f['limit']))
              fields_limits.append('f.%s.facet.mincount=%s' % (f['field'], f['mincount']))
              fields += ',' + f['field']
            keys = {
                'id': '%(id)s' % facet,
                'key': '%(field)s-%(id)s' % facet,
                'field': facet['field'],
                'fields': fields,
                'limit': int(facet['properties'].get('limit', 10)),
                'mincount': int(facet['properties']['mincount']),
                'fields_limits': ' '.join(fields_limits)
            }
            params += (
                ('facet.pivot', '{!key=%(key)s ex=%(id)s f.%(field)s.facet.limit=%(limit)s f.%(field)s.facet.mincount=%(mincount)s %(fields_limits)s}%(fields)s' % keys),
            )

      if json_facets:
        params += (
            ('json.facet', json.dumps(json_facets)),
        )

    params += self._get_fq(collection, query)

    fl = urllib.unquote(utf_quoter(','.join(Collection2.get_field_list(collection))))

    nested_fields = self._get_nested_fields(collection)
    if nested_fields:
      fl += urllib.unquote(utf_quoter(',[child parentFilter="%s"]' % ' OR '.join(nested_fields)))

    if collection['template']['moreLikeThis'] and fl != ['*']: # Potential conflict with nested documents
      id_field = collection.get('idField', 'id')
      params += (
        ('mlt', 'true'),
        ('mlt.fl', fl.replace(',%s' % id_field, '')),
        ('mlt.mintf', 1),
        ('mlt.mindf', 1),
        ('mlt.maxdf', 50),
        ('mlt.maxntp', 1000),
        ('mlt.count', 10),
        #('mlt.minwl', 1),
        #('mlt.maxwl', 1),
      )
      fl = '*'

    params += (('fl', fl),)

    params += (
      ('hl', 'true'),
      ('hl.fl', '*'),
      ('hl.snippets', 5),
      ('hl.fragsize', 1000),
    )

    if collection['template']['fieldsSelected']:
      fields = []
      for field in collection['template']['fieldsSelected']:
        attribute_field = filter(lambda attribute: field == attribute['name'], collection['template']['fieldsAttributes'])
        if attribute_field:
          if attribute_field[0]['sort']['direction']:
            fields.append('%s %s' % (field, attribute_field[0]['sort']['direction']))
      if fields:
        params += (
          ('sort', ','.join(fields)),
        )

    response = self._root.get('%(collection)s/select' % solr_query, params)
    return self._get_json(response)


  def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter):
    facet = facets[0]
    f_name = 'dim_%02d:%s' % (dim, facet['field'])

    if facet['aggregate']['function'] == 'count':
      if 'facet' not in _f:
        _f['facet'] = {f_name: {}}
      else:
        _f['facet'][f_name] = {}
      _f = _f['facet']

      sort = {'count': facet['sort']}
      for i, agg in enumerate(self._get_dimension_aggregates(facets)):
        if agg['sort'] != 'default':
          agg_function = self._get_aggregate_function(agg)
          sort = {'agg_%02d_%02d:%s' % (dim, i, agg_function): agg['sort']}
      if sort.get('count') == 'default':
        sort['count'] = 'desc'

      _f[f_name] = {
          'type': 'terms',
          'field': '%(field)s' % facet,
          'limit': int(facet.get('limit', 10)),
          'mincount': int(facet['mincount']),
          'numBuckets': True,
          'allBuckets': True,
          'sort': sort,
          'missing': facet.get('missing', False)
          #'prefix': '' # Forbidden on numeric fields
      }

      if 'start' in facet and not facet.get('type') == 'field':
        _f[f_name].update({
            'type': 'range',
            'start': facet['start'],
            'end': facet['end'],
            'gap': facet['gap']
        })

        # Only on dim 1 currently
        if timeFilter and timeFilter['time_field'] == facet['field'] and (widget['id'] not in timeFilter['time_filter_overrides']): # or facet['widgetType'] != 'bucket-widget'):
          facet['widgetType'] = widget['widgetType']
          _f[f_name].update(self._get_time_filter_query(timeFilter, facet))

      if widget['widgetType'] == 'tree2-widget' and facets[-1]['aggregate']['function'] != 'count':
        _f['subcount'] = self._get_aggregate_function(facets[-1])

      if len(facets) > 1: # Get n+1 dimension
        if facets[1]['aggregate']['function'] == 'count':
          self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1, timeFilter)
        else:
          self._n_facet_dimension(widget, _f[f_name], facets[1:], dim, timeFilter)
    else:
      agg_function = self._get_aggregate_function(facet)
      _f['facet'] = {
          'agg_%02d_00:%s' % (dim, agg_function): agg_function
      }
      for i, _f_agg in enumerate(facets[1:], 1):
        if _f_agg['aggregate']['function'] != 'count':
          agg_function = self._get_aggregate_function(_f_agg)
          _f['facet']['agg_%02d_%02d:%s' % (dim, i, agg_function)] = agg_function
        else:
          self._n_facet_dimension(widget, _f, facets[i:], dim + 1, timeFilter) # Get n+1 dimension
          break


  def select(self, collection, query=None, rows=100, start=0):
    if query is None:
      query = EMPTY_QUERY.get()

    params = self._get_params() + (
        ('q', query),
        ('wt', 'json'),
        ('rows', rows),
        ('start', start),
    )

    response = self._root.get('%s/select' % collection, params)
    return self._get_json(response)


  def suggest(self, collection, query):
    try:
      params = self._get_params() + (
          ('suggest', 'true'),
          ('suggest.build', 'true'),
          ('suggest.q', query['q']),
          ('wt', 'json'),
      )
      if query.get('dictionary'):
        params += (
            ('suggest.dictionary', query['dictionary']),
        )
      response = self._root.get('%s/suggest' % collection, params)
      return self._get_json(response)
    except RestException, e:
      raise PopupException(e, title=_('Error while accessing Solr'))
Exemplo n.º 57
0
class ResourceManagerApi(object):

  def __init__(self, rm_url, security_enabled=False, ssl_cert_ca_verify=False):
    self._url = posixpath.join(rm_url, 'ws', _API_VERSION)
    self._client = HttpClient(self._url, logger=LOG)
    self._root = Resource(self._client)
    self._security_enabled = security_enabled
    self._thread_local = threading.local() # To store user info
    self.from_failover = False

    if self._security_enabled:
      self._client.set_kerberos_auth()

    self._client.set_verify(ssl_cert_ca_verify)

  def _get_params(self):
    params = {}

    if self.username != DEFAULT_USER.get(): # We impersonate if needed
      params['doAs'] = self.username
      if not self.security_enabled:
        params['user.name'] = DEFAULT_USER.get()

    return params

  def __str__(self):
    return "ResourceManagerApi at %s" % (self._url,)

  def setuser(self, user):
    curr = self.user
    self._thread_local.user = user
    return curr

  @property
  def user(self):
    return self.username # Backward compatibility

  @property
  def username(self):
    try:
      return self._thread_local.user
    except AttributeError:
      return DEFAULT_USER.get()

  @property
  def url(self):
    return self._url

  @property
  def security_enabled(self):
    return self._security_enabled

  def cluster(self, **kwargs):
    params = self._get_params()
    return self._execute(self._root.get, 'cluster/info', params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def apps(self, **kwargs):
    params = self._get_params()
    params.update(kwargs)
    return self._execute(self._root.get, 'cluster/apps', params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def app(self, app_id):
    params = self._get_params()
    return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def appattempts(self, app_id):
    params = self._get_params()
    return self._execute(self._root.get, 'cluster/apps/%(app_id)s/appattempts' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE})

  def appattempts_attempt(self, app_id, attempt_id):
    attempts = self.appattempts(app_id)
    for attempt in attempts['appAttempts']['appAttempt']:
      if attempt['id'] == attempt_id:
        return attempt
    raise PopupException('Application {} does not have application attempt with id {}'.format(app_id, attempt_id))

  def kill(self, app_id):
    data = {'state': 'KILLED'}
    token = None

    # Tokens are managed within the kill method but should be moved out when not alpha anymore or we support submitting an app.
    if self.security_enabled and False:
      full_token = self.delegation_token()
      if 'token' not in full_token:
        raise PopupException(_('YARN did not return any token field.'), detail=smart_str(full_token))
      data['X-Hadoop-Delegation-Token'] = token = full_token.pop('token')
      LOG.debug('Received delegation token %s' % full_token)

    try:
      params = self._get_params()
      return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)
    finally:
      if token:
        self.cancel_token(token)

  def delegation_token(self):
    params = self._get_params()
    data = {'renewer': self.username}
    return self._execute(self._root.post, 'cluster/delegation-token', params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE)

  def cancel_token(self, token):
    params = self._get_params()
    headers = {'Hadoop-YARN-RM-Delegation-Token': token}
    LOG.debug('Canceling delegation token of ' % self.username)
    return self._execute(self._root.delete, 'cluster/delegation-token', params=params, headers=headers)

  def _execute(self, function, *args, **kwargs):
    response = None
    try:
      response = function(*args, **kwargs)
    except Exception, e:
      raise PopupException(_('YARN RM returned a failed response: %s') % e)
    return response
Exemplo n.º 58
0
class LivyClient(object):
    def __init__(self, livy_url):
        self._url = posixpath.join(livy_url)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = SECURITY_ENABLED.get()
        self._csrf_enabled = CSRF_ENABLED.get()
        self._thread_local = threading.local()

        if self.security_enabled:
            self._client.set_kerberos_auth()

        if self.csrf_enabled:
            self._client.set_headers({'X-Requested-By': 'hue'})

        self._client.set_verify(SSL_CERT_CA_VERIFY.get())

    def __str__(self):
        return "LivyClient at %s" % (self._url, )

    @property
    def url(self):
        return self._url

    @property
    def security_enabled(self):
        return self._security_enabled

    @property
    def csrf_enabled(self):
        return self._csrf_enabled

    @property
    def user(self):
        return self._thread_local.user

    def setuser(self, user):
        if hasattr(user, 'username'):
            self._thread_local.user = user.username
        else:
            self._thread_local.user = user

    def get_status(self):
        return self._root.get('sessions')

    def get_log(self, uuid, startFrom=None, size=None):
        params = {}

        if startFrom is not None:
            params['from'] = startFrom

        if size is not None:
            params['size'] = size

        response = self._root.get('sessions/%s/log' % uuid, params=params)

        return '\n'.join(response['log'])

    def create_session(self, **properties):
        properties['proxyUser'] = self.user.split('@')[0]
        if has_connectors():  # Only SQL supported via connectors currently
            properties['kind'] = 'sql'

        return self._root.post('sessions',
                               data=json.dumps(properties),
                               contenttype=_JSON_CONTENT_TYPE)

    def get_sessions(self):
        return self._root.get('sessions')

    def get_session(self, uuid):
        return self._root.get('sessions/%s' % uuid)

    def get_statements(self, uuid):
        return self._root.get('sessions/%s/statements' % uuid)

    def submit_statement(self, uuid, statement):
        data = {'code': statement}
        return self._root.post('sessions/%s/statements' % uuid,
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def inspect(self, uuid, statement):
        data = {'code': statement}
        return self._root.post('sessions/%s/inspect' % uuid,
                               data=json.dumps(data),
                               contenttype=_JSON_CONTENT_TYPE)

    def fetch_data(self, session, statement):
        return self._root.get('sessions/%s/statements/%s' %
                              (session, statement))

    def cancel(self, session):
        return self._root.post('sessions/%s/interrupt' % session)

    def close(self, uuid):
        return self._root.delete('sessions/%s' % uuid)

    def get_batches(self):
        return self._root.get('batches')

    def submit_batch(self, properties):
        properties['proxyUser'] = self.user
        return self._root.post('batches',
                               data=json.dumps(properties),
                               contenttype=_JSON_CONTENT_TYPE)

    def get_batch(self, uuid):
        return self._root.get('batches/%s' % uuid)

    def get_batch_status(self, uuid):
        response = self._root.get('batches/%s/state' % uuid)
        return response['state']

    def get_batch_log(self, uuid, startFrom=None, size=None):
        params = {}

        if startFrom is not None:
            params['from'] = startFrom

        if size is not None:
            params['size'] = size

        response = self._root.get('batches/%s/log' % uuid, params=params)

        return '\n'.join(response['log'])

    def close_batch(self, uuid):
        return self._root.delete('batches/%s' % uuid)
Exemplo n.º 59
0
class HistoryServerApi(object):
    def __init__(self,
                 oozie_url,
                 security_enabled=False,
                 ssl_cert_ca_verify=False):
        self._url = posixpath.join(oozie_url, 'ws/%s/history' % _API_VERSION)
        self._client = HttpClient(self._url, logger=LOG)
        self._root = Resource(self._client)
        self._security_enabled = security_enabled
        self._thread_local = threading.local()  # To store user info

        if self._security_enabled:
            self._client.set_kerberos_auth()

        self._client.set_verify(ssl_cert_ca_verify)

    def __str__(self):
        return "HistoryServerApi at %s" % (self._url, )

    def _get_params(self):
        params = {}

        if self.username != DEFAULT_USER.get():  # We impersonate if needed
            params['doAs'] = self.username
            if not self._security_enabled:
                params['user.name'] = DEFAULT_USER.get()

        return params

    @property
    def url(self):
        return self._url

    @property
    def user(self):
        return self.username  # Backward compatibility

    @property
    def username(self):
        try:
            return self._thread_local.user
        except AttributeError:
            return DEFAULT_USER.get()

    def setuser(self, user):
        curr = self.user
        self._thread_local.user = user
        return curr

    def job(self, user, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s' % {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def counters(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/counters' %
                              {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def conf(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/conf' %
                              {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def job_attempts(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/jobattempts' %
                              {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def tasks(self, job_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/tasks' %
                              {'job_id': job_id},
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def task(self, job_id, task_id):
        return self._root.get('mapreduce/jobs/%(job_id)s/tasks/%(task_id)s' % {
            'job_id': job_id,
            'task_id': task_id
        },
                              params=self._get_params(),
                              headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempts(self, job_id, task_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts' % {
                'job_id': job_id,
                'task_id': task_id
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_counters(self, job_id, task_id):
        job_id = job_id.replace('application', 'job')
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/counters' % {
                'job_id': job_id,
                'task_id': task_id
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt(self, job_id, task_id, attempt_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s'
            % {
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})

    def task_attempt_counters(self, job_id, task_id, attempt_id):
        return self._root.get(
            'mapreduce/jobs/%(job_id)s/tasks/%(task_id)s/attempts/%(attempt_id)s/counters'
            % {
                'job_id': job_id,
                'task_id': task_id,
                'attempt_id': attempt_id
            },
            params=self._get_params(),
            headers={'Accept': _JSON_CONTENT_TYPE})