Пример #1
0
def _list_query_history(user, querydict, page_size, prefix=""):
    """
  _list_query_history(user, querydict, page_size, prefix) -> (page, filter_param)

  A helper to gather the history page. It understands all the GET params in
  ``list_query_history``, by reading keys from the ``querydict`` with the
  given ``prefix``.
  """
    DEFAULT_SORT = ('-', 'date')  # Descending date

    SORT_ATTR_TRANSLATION = dict(
        date='submission_date',
        state='last_state',
        name='design__name',
        type='design__type',
    )

    db_queryset = models.QueryHistory.objects.select_related()

    # Filtering
    #
    # Queries without designs are the ones we submitted on behalf of the user,
    # (e.g. view table data). Exclude those when returning query history.
    if querydict.get(prefix + 'auto_query', 'on') != 'on':
        db_queryset = db_queryset.exclude(design__isnull=False,
                                          design__is_auto=True)

    user_filter = querydict.get(prefix + 'user', user.username)
    if user_filter != ':all':
        db_queryset = db_queryset.filter(owner__username=user_filter)

    # Design id
    design_id = querydict.get(prefix + 'design_id')
    if design_id:
        if design_id.isdigit():
            db_queryset = db_queryset.filter(design__id=int(design_id))
        else:
            raise PopupException(
                _('list_query_history requires design_id parameter to be an integer: %s'
                  ) % design_id)

    # Search
    search_filter = querydict.get(prefix + 'search')
    if search_filter:
        db_queryset = db_queryset.filter(
            Q(design__name__icontains=search_filter)
            | Q(query__icontains=search_filter)
            | Q(owner__username__icontains=search_filter))

    # Design type
    d_type = querydict.get(prefix + 'type')
    if d_type:
        if d_type not in list(SavedQuery.TYPES_MAPPING.keys()):
            LOG.warn('Bad parameter to list_query_history: type=%s' %
                     (d_type, ))
        else:
            db_queryset = db_queryset.filter(
                design__type=SavedQuery.TYPES_MAPPING[d_type])

    # If recent query
    recent = querydict.get('recent')
    if recent:
        db_queryset = db_queryset.filter(is_cleared=False)

    # Ordering
    sort_key = querydict.get(prefix + 'sort')
    if sort_key:
        sort_dir, sort_attr = '', sort_key
        if sort_key[0] == '-':
            sort_dir, sort_attr = '-', sort_key[1:]

        if sort_attr not in SORT_ATTR_TRANSLATION:
            LOG.warn('Bad parameter to list_query_history: sort=%s' %
                     (sort_key, ))
            sort_dir, sort_attr = DEFAULT_SORT
    else:
        sort_dir, sort_attr = DEFAULT_SORT
    db_queryset = db_queryset.order_by(
        sort_dir + SORT_ATTR_TRANSLATION[sort_attr], '-id')

    # Get the total return count before slicing
    total_count = db_queryset.count()

    # Slicing (must be the last filter applied)
    pagenum = int(querydict.get(prefix + 'page', 1))
    if pagenum < 1:
        pagenum = 1
    db_queryset = db_queryset[page_size * (pagenum - 1):page_size * pagenum]
    paginator = Paginator(db_queryset, page_size, allow_empty_first_page=True)

    try:
        page = paginator.page(pagenum)
    except EmptyPage:
        page = None

    # We do slicing ourselves, rather than letting the Paginator handle it, in order to
    # update the last_state on the running queries
    if page:
        for history in page.object_list:
            _update_query_state(history.get_full_object())

    # We need to pass the parameters back to the template to generate links
    keys_to_copy = [
        prefix + key for key in ('user', 'type', 'sort', 'design_id',
                                 'auto_query', 'search')
    ]
    filter_params = copy_query_dict(querydict, keys_to_copy)

    return paginator, page, filter_params
Пример #2
0
 def collection(self, name):
   try:
     collections = self.collections()
     return collections[name]
   except Exception, e:
     raise PopupException(e, title=_('Error while accessing Solr'))
Пример #3
0
  def app(self, appid):
    try:
      job = NativeYarnApi(self.user).get_job(jobid=appid)
    except ApplicationNotRunning as e:
      if e.job.get('state', '').lower() == 'accepted':
        rm_api = resource_manager_api.get_resource_manager(self.user)
        job = Application(e.job, rm_api)
      else:
        raise e  # Job has not yet been accepted by RM
    except JobExpired as e:
      raise PopupException(_('Job %s has expired.') % appid, detail=_('Cannot be found on the History Server.'))
    except Exception as e:
      msg = 'Could not find job %s.'
      LOG.exception(msg % appid)
      raise PopupException(_(msg) % appid, detail=e)


    app = massage_job_for_json(job, user=self.user)

    common = {
        'id': app['id'],
        'name': app['name'],
        'type': app['applicationType'],
        'status': app['status'],
        'apiStatus': self._api_status(app['status']),
        'user': app['user'],
        'progress': app['progress'],
        'duration': app['durationMs'],
        'submitted': app['startTimeMs'],
        'canWrite': app['canKill'],
    }

    if app['applicationType'] == 'MR2' or app['applicationType'] == 'MAPREDUCE':
      common['type'] = 'MAPREDUCE'

      if app['desiredMaps'] is None or app['finishedMaps'] is None:
        app['mapsPercentComplete'] = 100
      if app['desiredReduces'] is None or app['finishedReduces'] is None:
        app['reducesPercentComplete'] = 100

      common['properties'] = {
          'maps_percent_complete': app['mapsPercentComplete'] or 0,
          'reduces_percent_complete': app['reducesPercentComplete'] or 0,
          'finishedMaps': app['finishedMaps'] or 0,
          'finishedReduces': app['finishedReduces'] or 0,
          'desiredMaps': app['desiredMaps'] or 0,
          'desiredReduces': app['desiredReduces'] or 0,
          'durationFormatted': app['durationFormatted'],
          'startTimeFormatted': app['startTimeFormatted'],
          'diagnostics': app['diagnostics'] if app['diagnostics'] else '',
          'tasks': [],
          'metadata': [],
          'counters': []
      }
    elif app['applicationType'] == 'SPARK':
      app['logs'] = job.logs_url if hasattr(job, 'logs_url') else ''
      app['trackingUrl'] = job.trackingUrl if hasattr(job, 'trackingUrl') else ''
      common['type'] = 'SPARK'
      common['properties'] = {
        'metadata': [{'name': name, 'value': value} for name, value in app.items() if name != "url" and name != "killUrl"],
        'executors': []
      }
      if hasattr(job, 'metrics'):
        common['metrics'] = job.metrics
    elif app['applicationType'] == 'YarnV2':
      common['applicationType'] = app.get('type')
      common['properties'] = {
        'startTime': job.startTime,
        'finishTime': job.finishTime,
        'elapsedTime': job.duration,
        'attempts': [],
        'diagnostics': job.diagnostics
      }

    return common
Пример #4
0
    return wraps(view_func)(decorate)


def get_job(request, job_id):
    try:
        job = get_api(request.user, request.jt).get_job(jobid=job_id)
    except ApplicationNotRunning, e:
        if e.job.get('state', '').lower() == 'accepted':
            rm_api = resource_manager_api.get_resource_manager(request.user)
            job = Application(e.job, rm_api)
        else:
            raise e  # Job has not yet been accepted by RM
    except JobExpired, e:
        raise PopupException(
            _('Job %s has expired.') % job_id,
            detail=_('Cannot be found on the History Server.'))
    except Exception, e:
        msg = 'Could not find job %s.'
        LOG.exception(msg % job_id)
        raise PopupException(_(msg) % job_id, detail=e)
    return job


def apps(request):
    return render(
        'apps.mako', request, {
            'is_embeddable':
            request.GET.get('is_embeddable', False),
            'hiveserver2_impersonation_enabled':
            hiveserver2_impersonation_enabled()
Пример #5
0
    try:
        users = sync_ldap_users()
        groups = sync_ldap_groups()
    except ldap.LDAPError, e:
        LOG.error("LDAP Exception: %s" % e)
        raise PopupException(
            _('There was an error when communicating with LDAP'),
            detail=str(e))

    # Create home dirs for every user sync'd
    if is_ensuring_home_directory:
        for user in users:
            try:
                ensure_home_directory(fs, user.username)
            except (IOError, WebHdfsException), e:
                raise PopupException(
                    _("The import may not be complete, sync again."), detail=e)


def ensure_home_directory(fs, username):
    """
  Adds a users home directory if it doesn't already exist.

  Throws IOError, WebHdfsException.
  """
    home_dir = '/user/%s' % username
    fs.do_as_user(username, fs.create_home_dir, home_dir)


def _check_remove_last_super(user_obj):
    """Raise an error if we're removing the last superuser"""
    if not user_obj.is_superuser:
Пример #6
0
 def popup_exception_view(request, *args, **kwargs):
   raise PopupException(exc_msg, title="earráid", detail=exc_msg)
Пример #7
0
 def download(self, notebook, snippet, format):
     raise PopupException('Downloading is not supported yet')
Пример #8
0
    def upload(self,
               data,
               data_type='queries',
               source_platform='generic',
               workload_id=None):
        if data_type in ('table_stats', 'cols_stats'):
            data_suffix = '.json'
            if data_type == 'table_stats':
                extra_parameters = {'fileType': 'TABLE_STATS'}
            else:
                extra_parameters = {'fileType': 'COLUMN_STATS'}
        else:
            data_suffix = '.csv'
            extra_parameters = {
                'colDelim':
                ',',
                'rowDelim':
                '\n',
                "headerFields": [{
                    "count": 0,
                    "name": "SQL_ID",
                    "coltype": "SQL_ID",
                    "use": True,
                    "tag": ""
                }, {
                    "count": 0,
                    "name": "ELAPSED_TIME",
                    "coltype": "NONE",
                    "use": True,
                    "tag": ""
                }, {
                    "count": 0,
                    "name": "SQL_FULLTEXT",
                    "coltype": "SQL_QUERY",
                    "use": True,
                    "tag": ""
                }, {
                    "count": 0,
                    "name": "DATABASE",
                    "coltype": "NONE",
                    "use": True,
                    "tag": "DATABASE"
                }],
            }

        f_queries_path = NamedTemporaryFile(suffix=data_suffix)
        f_queries_path.close(
        )  # Reopened as real file below to work well with the command

        try:
            f_queries = open(f_queries_path.name, 'w+')

            try:
                # Queries
                if data_suffix == '.csv':
                    content_generator = OptimizerQueryDataAdapter(data)
                    queries_csv = export_csvxls.create_generator(
                        content_generator, 'csv')

                    for row in queries_csv:
                        f_queries.write(row)
                        LOG.debug(row)
                else:
                    # Table, column stats
                    f_queries.write(json.dumps(data))
                    LOG.debug(json.dumps(data))

            finally:
                f_queries.close()

            parameters = {
                'tenant': self._product_name,
                'fileLocation': f_queries.name,
                'sourcePlatform': source_platform,
            }
            parameters.update(extra_parameters)
            response = self._api.call_api('upload', parameters)
            status = json.loads(response)

            status['count'] = len(data)
            return status

        except RestException, e:
            raise PopupException(e, title=_('Error while accessing Optimizer'))
Пример #9
0
def get_api(request, snippet):
    from notebook.connectors.oozie_batch import OozieApi

    if snippet.get('wasBatchExecuted') and not TASK_SERVER.ENABLED.get():
        return OozieApi(user=request.user, request=request)

    if snippet['type'] == 'report':
        snippet['type'] = 'impala'

    interpreter = get_interpreter(connector_type=snippet['type'],
                                  user=request.user)
    interface = interpreter['interface']

    if get_cluster_config(request.user)['has_computes']:
        compute = json.loads(request.POST.get(
            'cluster',
            '""'))  # Via Catalog autocomplete API or Notebook create sessions.
        if compute == '""' or compute == 'undefined':
            compute = None
        if not compute and snippet.get('compute'):  # Via notebook.ko.js
            interpreter['compute'] = snippet['compute']

    LOG.debug('Selected interpreter %s interface=%s compute=%s' %
              (interpreter['type'], interface, interpreter.get('compute')
               and interpreter['compute']['name']))

    if interface == 'hiveserver2':
        from notebook.connectors.hiveserver2 import HS2Api
        return HS2Api(user=request.user,
                      request=request,
                      interpreter=interpreter)
    elif interface == 'oozie':
        return OozieApi(user=request.user, request=request)
    elif interface == 'livy':
        from notebook.connectors.spark_shell import SparkApi
        return SparkApi(request.user)
    elif interface == 'livy-batch':
        from notebook.connectors.spark_batch import SparkBatchApi
        return SparkBatchApi(request.user)
    elif interface == 'text' or interface == 'markdown':
        from notebook.connectors.text import TextApi
        return TextApi(request.user)
    elif interface == 'rdbms':
        from notebook.connectors.rdbms import RdbmsApi
        return RdbmsApi(request.user,
                        interpreter=snippet['type'],
                        query_server=snippet.get('query_server'))
    elif interface == 'jdbc':
        if interpreter['options'] and interpreter['options'].get(
                'url', '').find('teradata') >= 0:
            from notebook.connectors.jdbc_teradata import JdbcApiTeradata
            return JdbcApiTeradata(request.user, interpreter=interpreter)
        if interpreter['options'] and interpreter['options'].get(
                'url', '').find('awsathena') >= 0:
            from notebook.connectors.jdbc_athena import JdbcApiAthena
            return JdbcApiAthena(request.user, interpreter=interpreter)
        elif interpreter['options'] and interpreter['options'].get(
                'url', '').find('presto') >= 0:
            from notebook.connectors.jdbc_presto import JdbcApiPresto
            return JdbcApiPresto(request.user, interpreter=interpreter)
        elif interpreter['options'] and interpreter['options'].get(
                'url', '').find('clickhouse') >= 0:
            from notebook.connectors.jdbc_clickhouse import JdbcApiClickhouse
            return JdbcApiClickhouse(request.user, interpreter=interpreter)
        elif interpreter['options'] and interpreter['options'].get(
                'url', '').find('vertica') >= 0:
            from notebook.connectors.jdbc_vertica import JdbcApiVertica
            return JdbcApiVertica(request.user, interpreter=interpreter)
        else:
            from notebook.connectors.jdbc import JdbcApi
            return JdbcApi(request.user, interpreter=interpreter)
    elif interface == 'teradata':
        from notebook.connectors.jdbc import JdbcApiTeradata
        return JdbcApiTeradata(request.user, interpreter=interpreter)
    elif interface == 'athena':
        from notebook.connectors.jdbc import JdbcApiAthena
        return JdbcApiAthena(request.user, interpreter=interpreter)
    elif interface == 'presto':
        from notebook.connectors.jdbc_presto import JdbcApiPresto
        return JdbcApiPresto(request.user, interpreter=interpreter)
    elif interface == 'sqlalchemy':
        from notebook.connectors.sql_alchemy import SqlAlchemyApi
        return SqlAlchemyApi(request.user, interpreter=interpreter)
    elif interface == 'solr':
        from notebook.connectors.solr import SolrApi
        return SolrApi(request.user, interpreter=interpreter)
    elif interface == 'hbase':
        from notebook.connectors.hbase import HBaseApi
        return HBaseApi(request.user)
    elif interface == 'kafka':
        from notebook.connectors.kafka import KafkaApi
        return KafkaApi(request.user)
    elif interface == 'pig':
        return OozieApi(user=request.user,
                        request=request)  # Backward compatibility until Hue 4
    else:
        raise PopupException(
            _('Notebook connector interface not recognized: %s') % interface)
Пример #10
0
def execute_and_watch(request):
  notebook_id = request.GET.get('editor', request.GET.get('notebook'))
  snippet_id = int(request.GET['snippet'])
  action = request.GET['action']
  destination = request.GET['destination']

  notebook = Notebook(document=Document2.objects.get(id=notebook_id)).get_data()
  snippet = notebook['snippets'][snippet_id]
  editor_type = snippet['type']

  api = get_api(request, snippet)

  if action == 'save_as_table':
    sql, success_url = api.export_data_as_table(notebook, snippet, destination)
    editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute', database=snippet['database'])
  elif action == 'insert_as_query':
    # TODO: checks/workarounds in case of non impersonation or Sentry
    # TODO: keep older simpler way in case of known not many rows?
    sql, success_url = api.export_large_data_to_hdfs(notebook, snippet, destination)
    editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute', database=snippet['database'], on_success_url=success_url)
  elif action == 'index_query':
    if destination == '__hue__':
      destination = _get_snippet_name(notebook, unique=True, table_format=True)
      live_indexing = True
    else:
      live_indexing = False

    sql, success_url = api.export_data_as_table(notebook, snippet, destination, is_temporary=True, location='')
    editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute')

    sample = get_api(request, snippet).fetch_result(notebook, snippet, 0, start_over=True)

    from indexer.api3 import _index # Will ve moved to the lib
    from indexer.file_format import HiveFormat
    from indexer.fields import Field

    file_format = {
        'name': 'col',
        'inputFormat': 'query',
        'format': {'quoteChar': '"', 'recordSeparator': '\n', 'type': 'csv', 'hasHeader': False, 'fieldSeparator': '\u0001'},
        "sample": '',
        "columns": [
            Field(col['name'].rsplit('.')[-1], HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'], 'string')).to_dict()
            for col in sample['meta']
        ]
    }

    if live_indexing:
      file_format['inputFormat'] = 'hs2_handle'
      file_format['fetch_handle'] = lambda rows, start_over: get_api(request, snippet).fetch_result(notebook, snippet, rows=rows, start_over=start_over)

    job_handle = _index(request, file_format, destination, query=notebook['uuid'])

    if live_indexing:
      return redirect(reverse('search:browse', kwargs={'name': destination}))
    else:
      return redirect(reverse('oozie:list_oozie_workflow', kwargs={'job_id': job_handle['handle']['id']}))
  else:
    raise PopupException(_('Action %s is unknown') % action)

  return render('editor.mako', request, {
      'notebooks_json': json.dumps([editor.get_data()]),
      'options_json': json.dumps({
          'languages': [{"name": "%s SQL" % editor_type.title(), "type": editor_type}],
          'mode': 'editor',
          'editor_type': editor_type,
          'success_url': success_url
      }),
      'editor_type': editor_type,
  })
Пример #11
0
  credentials = Credentials()

  try:
    credentials.fetch(api)
  except Exception, e:
    LOG.error(smart_str(e))

  if USE_NEW_EDITOR.get():
    workflows = [dict([('uuid', d.uuid), ('name', d.name)])
                      for d in Document2.objects.documents(request.user).search_documents(types=['oozie-workflow2'])]
  else:
    workflows = [dict([('uuid', d.content_object.uuid), ('name', d.content_object.name)])
                      for d in Document.objects.available_docs(Document2, request.user).filter(extra='workflow2')]

  if coordinator_id and not filter(lambda a: a['uuid'] == coordinator.data['properties']['workflow'], workflows):
    raise PopupException(_('You don\'t have access to the workflow of this coordinator.'))

  return render('editor2/coordinator_editor.mako', request, {
      'coordinator_json': coordinator.to_json_for_html(),
      'credentials_json': json.dumps(credentials.credentials.keys(), cls=JSONEncoderForHTML),
      'workflows_json': json.dumps(workflows, cls=JSONEncoderForHTML),
      'doc_uuid': doc.uuid if doc else '',
      'can_edit_json': json.dumps(doc is None or doc.doc.get().is_editable(request.user))
  })


@check_editor_access_permission
def new_coordinator(request):
  return edit_coordinator(request)

Пример #12
0
class YarnApi(JobBrowserApi):
    """
  List all the jobs with Resource Manager API.
  Get running single job information with MapReduce API.
  Get finished single job information with History Server API.

  The trick is that we use appid when the job is running and jobid when it is finished.
  We also suppose that each app id has only one MR job id.
  e.g. job_1355791146953_0105, application_1355791146953_0105

  A better alternative might be to call the Resource Manager instead of relying on the type of job id.
  The perfect solution would be to have all this logic embedded
  """
    def __init__(self, user):
        self.user = user
        self.resource_manager_api = resource_manager_api.get_resource_manager()
        self.mapreduce_api = mapreduce_api.get_mapreduce_api()
        self.history_server_api = history_server_api.get_history_server_api()

    def get_job_link(self, job_id):
        return self.get_job(job_id)

    @rm_ha
    def get_jobs(self, user, **kwargs):
        state_filters = {
            'running': 'UNDEFINED',
            'completed': 'SUCCEEDED',
            'failed': 'FAILED',
            'killed': 'KILLED',
        }
        filters = {}

        if kwargs['username']:
            filters['user'] = kwargs['username']
        if kwargs['state'] and kwargs['state'] != 'all':
            filters['finalStatus'] = state_filters[kwargs['state']]

        json = self.resource_manager_api.apps(**filters)
        if type(json) == str and 'This is standby RM' in json:
            raise Exception(json)

        if json['apps']:
            jobs = [Application(app) for app in json['apps']['app']]
        else:
            return []

        if kwargs['text']:
            text = kwargs['text'].lower()
            jobs = filter(
                lambda job: text in job.name.lower() or text in job.id.lower()
                or text in job.user.lower() or text in job.queue.lower(), jobs)

        return self.filter_jobs(user, jobs)

    def filter_jobs(self, user, jobs, **kwargs):
        check_permission = not SHARE_JOBS.get() and not user.is_superuser

        return filter(
            lambda job: not check_permission or user.is_superuser or job.user
            == user.username, jobs)

    @rm_ha
    def get_job(self, jobid):
        try:
            # App id
            jobid = jobid.replace('job', 'application')
            job = self.resource_manager_api.app(jobid)['app']

            if job['state'] == 'ACCEPTED':
                raise ApplicationNotRunning(jobid, job)
            elif job['state'] == 'KILLED':
                return KilledYarnJob(self.resource_manager_api, job)

            if job.get('applicationType') == 'SPARK':
                job = SparkJob(job, self.resource_manager_api)
            elif job.get('applicationType') == 'MAPREDUCE':
                jobid = jobid.replace('application', 'job')

                if job['state'] in ('NEW', 'SUBMITTED', 'ACCEPTED', 'RUNNING'):
                    json = self.mapreduce_api.job(self.user, jobid)
                    job = YarnJob(self.mapreduce_api, json['job'])
                else:
                    json = self.history_server_api.job(self.user, jobid)
                    job = YarnJob(self.history_server_api, json['job'])
            else:
                job = Application(job, self.resource_manager_api)
        except ApplicationNotRunning, e:
            raise e
        except Exception, e:
            if 'NotFoundException' in str(e):
                raise JobExpired(jobid)
            else:
                raise PopupException('Job %s could not be found: %s' %
                                     (jobid, e),
                                     detail=e)
Пример #13
0
 def check_request_permission(self, request):
   """Raise PopupException if request user doesn't have permission to modify workflow"""
   if not is_admin(request.user) and request.user.username != self.user:
     access_warn(request, _('Insufficient permission.'))
     raise PopupException(_("Permission denied. User %(username)s cannot modify user %(user)s's job.") %
                          dict(username=request.user.username, user=self.user))
Пример #14
0
        if self.api.remove_collection(name):
            # Delete instance directory.
            try:
                root_node = '%s/%s' % (ZK_SOLR_CONFIG_NAMESPACE, name)
                with ZookeeperClient(hosts=get_solr_ensemble(),
                                     read_only=False) as zc:
                    zc.delete_path(root_node)
            except Exception, e:
                # Re-create collection so that we don't have an orphan config
                self.api.add_collection(name)
                raise PopupException(
                    _('Error in deleting Solr configurations.'), detail=e)
        else:
            raise PopupException(
                _('Could not remove collection. Check error logs for more info.'
                  ))

    def get_index_schema(self, index_name):
        """
    Returns a tuple of the unique key and schema fields for a given index
    """
        try:
            field_data = self.api.fields(index_name)
            fields = self._format_flags(field_data['schema']['fields'])
            uniquekey = self.api.uniquekey(index_name)
            return uniquekey, fields
        except Exception, e:
            LOG.exception(e.message)
            raise IndexControllerException(
                _("Error in getting schema information for index '%s'" %
Пример #15
0
    try:
        if request.FILES.get('documents'):
            documents = request.FILES['documents'].read()
        else:
            documents = json.loads(request.POST.get('documents'))

        documents = json.loads(documents)
    except ValueError, e:
        raise PopupException(
            _('Failed to import documents, the file does not contain valid JSON.'
              ))

    # Validate documents
    if not _is_import_valid(documents):
        raise PopupException(
            _('Failed to import documents, the file does not contain the expected JSON schema for Hue documents.'
              ))

    docs = []

    uuids_map = dict((doc['fields']['uuid'], None) for doc in documents
                     if not is_reserved_directory(doc))

    for doc in documents:
        # Filter docs to import, ignoring reserved directories (home and Trash) and history docs
        if not is_reserved_directory(doc):
            # Remove any deprecated fields
            if 'tags' in doc['fields']:
                doc['fields'].pop('tags')

            # If doc is not owned by current user, make a copy of the document with current user as owner
Пример #16
0
def import_documents(request):
    def is_reserved_directory(doc):
        return doc['fields']['type'] == 'directory' and doc['fields'][
            'name'] in (Document2.HOME_DIR, Document2.TRASH_DIR)

    try:
        if request.FILES.get('documents'):
            documents = request.FILES['documents'].read()
        else:
            documents = json.loads(request.POST.get('documents'))

        documents = json.loads(documents)
    except ValueError as e:
        raise PopupException(
            _('Failed to import documents, the file does not contain valid JSON.'
              ))

    # Validate documents
    if not _is_import_valid(documents):
        raise PopupException(
            _('Failed to import documents, the file does not contain the expected JSON schema for Hue documents.'
              ))

    docs = []

    uuids_map = dict((doc['fields']['uuid'], None) for doc in documents
                     if not is_reserved_directory(doc))

    for doc in documents:
        # Filter docs to import, ignoring reserved directories (home and Trash) and history docs
        if not is_reserved_directory(doc):
            # Remove any deprecated fields
            if 'tags' in doc['fields']:
                doc['fields'].pop('tags')

            # If doc is not owned by current user, make a copy of the document with current user as owner
            if doc['fields']['owner'][0] != request.user.username:
                doc = _copy_document_with_owner(doc, request.user, uuids_map)
            else:  # Update existing doc or create new
                doc = _create_or_update_document_with_owner(
                    doc, request.user, uuids_map)

            # For oozie docs replace dependent uuids with the newly created ones
            if doc['fields']['type'].startswith('oozie-'):
                doc = _update_imported_oozie_document(doc, uuids_map)

            # If the doc contains any history dependencies, ignore them
            # NOTE: this assumes that each dependency is exported as an array using the natural PK [uuid, version, is_history]
            deps_minus_history = [
                dep for dep in doc['fields'].get('dependencies', [])
                if len(dep) >= 3 and not dep[2]
            ]
            doc['fields']['dependencies'] = deps_minus_history

            # Replace illegal characters
            if '/' in doc['fields']['name']:
                new_name = doc['fields']['name'].replace('/', '-')
                LOG.warn(
                    "Found illegal slash in document named: %s, renaming to: %s."
                    % (doc['fields']['name'], new_name))
                doc['fields']['name'] = new_name

            # Set last modified date to now
            doc['fields']['last_modified'] = datetime.now().replace(
                microsecond=0).isoformat()
            docs.append(doc)

    f = tempfile.NamedTemporaryFile(mode='w+', suffix='.json')
    f.write(json.dumps(docs))
    f.flush()

    stdout = string_io()
    try:
        with transaction.atomic(
        ):  # We wrap both commands to commit loaddata & sync
            management.call_command(
                'loaddata',
                f.name,
                verbosity=3,
                traceback=True,
                stdout=stdout,
                commit=False
            )  # We need to use commit=False because commit=True will close the connection and make Document.objects.sync fail.
            Document.objects.sync()

        if request.POST.get('redirect'):
            return redirect(request.POST.get('redirect'))
        else:
            return JsonResponse({
                'status':
                0,
                'message':
                stdout.getvalue(),
                'count':
                len(documents),
                'created_count':
                len([doc for doc in documents if doc['pk'] is None]),
                'updated_count':
                len([doc for doc in documents if doc['pk'] is not None]),
                'username':
                request.user.username,
                'documents': [
                    dict([('name', doc['fields']['name']),
                          ('uuid', doc['fields']['uuid']),
                          ('type', doc['fields']['type']),
                          ('owner', doc['fields']['owner'][0])])
                    for doc in docs
                ]
            })
    except Exception as e:
        LOG.error('Failed to run loaddata command in import_documents:\n %s' %
                  stdout.getvalue())
        return JsonResponse({'status': -1, 'message': smart_str(e)})
    finally:
        stdout.close()
Пример #17
0
def copy_document(request):
    uuid = json.loads(request.POST.get('uuid'), '""')

    if not uuid:
        raise PopupException(_('copy_document requires uuid'))

    # Document2 and Document model objects are linked and both are saved when saving
    document = Document2.objects.get_by_uuid(user=request.user, uuid=uuid)
    # Document model object
    document1 = document.doc.get()

    if document.type == 'directory':
        raise PopupException(_('Directory copy is not supported'))

    name = document.name + '-copy'

    # Make the copy of the Document2 model object
    copy_document = document.copy(name=name, owner=request.user)
    # Make the copy of Document model object too
    document1.copy(content_object=copy_document, name=name, owner=request.user)

    # Import workspace for all oozie jobs
    if document.type == 'oozie-workflow2' or document.type == 'oozie-bundle2' or document.type == 'oozie-coordinator2':
        from oozie.models2 import Workflow, Coordinator, Bundle, _import_workspace
        # Update the name field in the json 'data' field
        if document.type == 'oozie-workflow2':
            workflow = Workflow(document=document)
            workflow.update_name(name)
            workflow.update_uuid(copy_document.uuid)
            _import_workspace(request.fs, request.user, workflow)
            copy_document.update_data(
                {'workflow': workflow.get_data()['workflow']})
            copy_document.save()

        if document.type == 'oozie-bundle2' or document.type == 'oozie-coordinator2':
            if document.type == 'oozie-bundle2':
                bundle_or_coordinator = Bundle(document=document)
            else:
                bundle_or_coordinator = Coordinator(document=document)
            json_data = bundle_or_coordinator.get_data_for_json()
            json_data['name'] = name
            json_data['uuid'] = copy_document.uuid
            copy_document.update_data(json_data)
            copy_document.save()
            _import_workspace(request.fs, request.user, bundle_or_coordinator)
    elif document.type == 'search-dashboard':
        from dashboard.models import Collection2
        collection = Collection2(request.user, document=document)
        collection.data['collection']['label'] = name
        collection.data['collection']['uuid'] = copy_document.uuid
        copy_document.update_data(
            {'collection': collection.data['collection']})
        copy_document.save()
    # Keep the document and data in sync
    else:
        copy_data = copy_document.data_dict
        if 'name' in copy_data:
            copy_data['name'] = name
        if 'uuid' in copy_data:
            copy_data['uuid'] = copy_document.uuid
        copy_document.update_data(copy_data)
        copy_document.save()

    return JsonResponse({'status': 0, 'document': copy_document.to_dict()})
Пример #18
0
        if api.remove_collection(name):
            # Delete instance directory.
            try:
                root_node = '%s/%s' % (ZK_SOLR_CONFIG_NAMESPACE, name)
                with ZookeeperClient(hosts=get_solr_ensemble(),
                                     read_only=False) as zc:
                    zc.delete_path(root_node)
            except Exception, e:
                # Re-create collection so that we don't have an orphan config
                api.add_collection(name)
                raise PopupException(
                    _('Error in deleting Solr configurations.'), detail=e)
        else:
            raise PopupException(
                _('Could not remove collection. Check error logs for more info.'
                  ))

    def update_collection(self, name, fields):
        """
    Only create new fields
    """
        api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get())
        # Create only new fields
        # Fields that already exist, do not overwrite since there is no way to do that, currently.
        old_field_names = api.fields(name)['schema']['fields'].keys()
        new_fields = filter(lambda field: field['name'] not in old_field_names,
                            fields)
        new_fields_filtered = []
        for field in new_fields:
            new_field = {}
Пример #19
0
Файл: views.py Проект: ymc/hue
def index(request):
  try:
    overview = _get_global_overview()
  except Exception, e:
    raise PopupException(_('Could not correctly connect to Zookeeper.'), detail=e)
Пример #20
0
class SavedQuery(models.Model):
    """
  Stores the query that people have save or submitted.

  Note that this used to be called QueryDesign. Any references to 'design'
  probably mean a SavedQuery.
  """
    DEFAULT_NEW_DESIGN_NAME = _('My saved query')
    AUTO_DESIGN_SUFFIX = _(' (new)')
    TYPES = QUERY_TYPES
    TYPES_MAPPING = {
        'beeswax': HQL,
        'hql': HQL,
        'impala': IMPALA,
        'rdbms': RDBMS,
        'spark': SPARK
    }

    type = models.IntegerField(null=False)
    owner = models.ForeignKey(User, db_index=True)
    # Data is a json of dictionary. See the beeswax.design module.
    data = models.TextField(max_length=65536)
    name = models.CharField(max_length=64)
    desc = models.TextField(max_length=1024)
    mtime = models.DateTimeField(auto_now=True)
    # An auto design is a place-holder for things users submit but not saved.
    # We still want to store it as a design to allow users to save them later.
    is_auto = models.BooleanField(default=False, db_index=True)
    is_trashed = models.BooleanField(default=False,
                                     db_index=True,
                                     verbose_name=_t('Is trashed'),
                                     help_text=_t('If this query is trashed.'))

    doc = generic.GenericRelation(Document, related_name='hql_doc')

    class Meta:
        ordering = ['-mtime']

    def get_design(self):
        try:
            return HQLdesign.loads(self.data)
        except ValueError:
            # data is empty
            pass

    def clone(self):
        """clone() -> A new SavedQuery with a deep copy of the same data"""
        design = SavedQuery(type=self.type, owner=self.owner)
        design.data = copy.deepcopy(self.data)
        design.name = copy.deepcopy(self.name)
        design.desc = copy.deepcopy(self.desc)
        design.is_auto = copy.deepcopy(self.is_auto)
        return design

    @classmethod
    def create_empty(cls, app_name, owner, data):
        query_type = SavedQuery.TYPES_MAPPING[app_name]
        design = SavedQuery(owner=owner, type=query_type)
        design.name = SavedQuery.DEFAULT_NEW_DESIGN_NAME
        design.desc = ''
        design.data = data
        design.is_auto = True
        design.save()
        return design

    @staticmethod
    def get(id, owner=None, type=None):
        """
    get(id, owner=None, type=None) -> SavedQuery object

    Checks that the owner and type match (when given).
    May raise PopupException (type/owner mismatch).
    May raise SavedQuery.DoesNotExist.
    """
        try:
            design = SavedQuery.objects.get(id=id)
        except SavedQuery.DoesNotExist, err:
            msg = _('Cannot retrieve query id %(id)s.') % {'id': id}
            raise err

        if owner is not None and design.owner != owner:
            msg = _('Query id %(id)s does not belong to user %(user)s.') % {
                'id': id,
                'user': owner
            }
            LOG.error(msg)
            raise PopupException(msg)

        if type is not None and design.type != type:
            msg = _('Type mismatch for design id %(id)s (owner %(owner)s) - Expected %(expected_type)s, got %(real_type)s.') % \
                  {'id': id, 'owner': owner, 'expected_type': design.type, 'real_type': type}
            LOG.error(msg)
            raise PopupException(msg)

        return design
Пример #21
0
    def process_view(self, request, view_func, view_args, view_kwargs):
        """
    We also perform access logging in ``process_view()`` since we have the view function,
    which tells us the log level. The downside is that we don't have the status code,
    which isn't useful for status logging anyways.
    """
        access_log_level = getattr(view_func, 'access_log_level', None)
        # First, skip views not requiring login

        # If the view has "opted out" of login required, skip
        if hasattr(view_func, "login_notrequired"):
            log_page_hit(request,
                         view_func,
                         level=access_log_level or logging.DEBUG)
            return None

        # There are certain django views which are also opt-out, but
        # it would be evil to go add attributes to them
        if view_func in DJANGO_VIEW_AUTH_WHITELIST:
            log_page_hit(request,
                         view_func,
                         level=access_log_level or logging.DEBUG)
            return None

        # If user is logged in, check that he has permissions to access the
        # app.
        if request.user.is_active and request.user.is_authenticated():
            AppSpecificMiddleware.augment_request_with_app(request, view_func)

            # Until we get Django 1.3 and resolve returning the URL name, we just do a match of the name of the view
            try:
                access_view = 'access_view:%s:%s' % (
                    request._desktop_app, resolve(request.path)[0].__name__)
            except Exception, e:
                access_log(request,
                           'error checking view perm: %s',
                           e,
                           level=access_log_level)
                access_view = ''

            # Accessing an app can access an underlying other app.
            # e.g. impala or spark uses code from beeswax and so accessing impala shows up as beeswax here.
            # Here we trust the URL to be the real app we need to check the perms.
            app_accessed = request._desktop_app
            ui_app_accessed = get_app_name(request)
            if app_accessed != ui_app_accessed and ui_app_accessed not in (
                    'logs', 'accounts', 'login'):
                app_accessed = ui_app_accessed

            if app_accessed and \
                app_accessed not in ("desktop", "home", "about") and \
                not (request.user.has_hue_permission(action="access", app=app_accessed) or
                     request.user.has_hue_permission(action=access_view, app=app_accessed)):
                access_log(request,
                           'permission denied',
                           level=access_log_level)
                return PopupException(_(
                    "You do not have permission to access the %(app_name)s application."
                ) % {
                    'app_name': app_accessed.capitalize()
                },
                                      error_code=401).response(request)
            else:
                log_page_hit(request, view_func, level=access_log_level)
                return None
Пример #22
0
 def cores(self):
     try:
         return self._root.get('admin/cores', params={'wt':
                                                      'json'})['status']
     except RestException, e:
         raise PopupException('Error while accessing Solr: %s' % e)
Пример #23
0
 def cores(self):
     try:
         params = self._get_params() + (('wt', 'json'), )
         return self._root.get('admin/cores', params=params)['status']
     except RestException, e:
         raise PopupException(e, title=_('Error while accessing Solr'))
Пример #24
0
    if USE_NEW_EDITOR.get():
        scheduled_uuid = coordinator.data['properties'][
            'workflow'] or coordinator.data['properties']['document']
        if scheduled_uuid:
            try:
                document = Document2.objects.get(uuid=scheduled_uuid)
            except Document2.DoesNotExist as e:
                document = None
                coordinator.data['properties']['workflow'] = ''
                LOG.warn("Workflow with uuid %s doesn't exist: %s" %
                         (scheduled_uuid, e))

            if document and document.is_trashed:
                raise PopupException(
                    _('Your workflow %s has been trashed!') %
                    (document.name if document.name else ''))

            if document and not document.can_read(request.user):
                raise PopupException(
                    _('You don\'t have access to the workflow or document of this coordinator.'
                      ))
    else:
        workflows = [
            dict([('uuid', d.content_object.uuid),
                  ('name', d.content_object.name)])
            for d in Document.objects.available_docs(
                Document2, request.user).filter(extra='workflow2')
        ]

        if coordinator_id and not filter(
Пример #25
0
def edit_user(request, username=None):
    """
  edit_user(request, username = None) -> reply

  @type request:        HttpRequest
  @param request:       The request object
  @type username:       string
  @param username:      Default to None, when creating a new user
  """
    if request.user.username != username and not request.user.is_superuser:
        raise PopupException(
            _("You must be a superuser to add or edit another user."),
            error_code=401)

    if username is not None:
        instance = User.objects.get(username=username)
    else:
        instance = None

    if request.user.is_superuser:
        form_class = SuperUserChangeForm
    else:
        form_class = UserChangeForm

    if request.method == 'POST':
        form = form_class(request.POST, instance=instance)
        if form.is_valid():  # All validation rules pass
            if instance is None:
                instance = form.save()
                get_profile(instance)
            else:
                if username != form.instance.username:
                    raise PopupException(_("You cannot change a username."),
                                         error_code=401)
                if request.user.username == username and not form.instance.is_active:
                    raise PopupException(
                        _("You cannot make yourself inactive."),
                        error_code=401)

                global __users_lock
                __users_lock.acquire()
                try:
                    # form.instance (and instance) now carry the new data
                    orig = User.objects.get(username=username)
                    if orig.is_superuser:
                        if not form.instance.is_superuser or not form.instance.is_active:
                            _check_remove_last_super(orig)
                    else:
                        if form.instance.is_superuser and not request.user.is_superuser:
                            raise PopupException(
                                _("You cannot make yourself a superuser."),
                                error_code=401)

                    # All ok
                    form.save()
                    request.info(_('User information updated'))
                finally:
                    __users_lock.release()

            # Ensure home directory is created, if necessary.
            if form.cleaned_data['ensure_home_directory']:
                try:
                    ensure_home_directory(request.fs, instance.username)
                except (IOError, WebHdfsException), e:
                    request.error(
                        _('Cannot make home directory for user %s.' %
                          instance.username))
            if request.user.is_superuser:
                return redirect(reverse(list_users))
            else:
                return redirect(
                    reverse(edit_user, kwargs={'username': username}))
Пример #26
0
def _submit_coordinator(request, coordinator, mapping):
    try:
        wf = coordinator.workflow
        if IS_MULTICLUSTER_ONLY.get() and has_multi_cluster():
            mapping['auto-cluster'] = {
                u'additionalClusterResourceTags': [],
                u'automaticTerminationCondition':
                u'EMPTY_JOB_QUEUE',  #'u'NONE',
                u'cdhVersion':
                u'CDH514',
                u'clouderaManagerPassword':
                u'guest',
                u'clouderaManagerUsername':
                u'guest',
                u'clusterName':
                u'analytics4',  # Add time variable
                u'computeWorkersConfiguration': {
                    u'bidUSDPerHr': 0,
                    u'groupSize': 0,
                    u'useSpot': False
                },
                u'environmentName':
                u'crn:altus:environments:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:environment:analytics/236ebdda-18bd-428a-9d2b-cd6973d42946',
                u'instanceBootstrapScript':
                u'',
                u'instanceType':
                u'm4.xlarge',
                u'jobSubmissionGroupName':
                u'',
                u'jobs': [
                    {
                        u'failureAction': u'INTERRUPT_JOB_QUEUE',
                        u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
                        u'sparkJob': {
                            u'applicationArguments': ['5'],
                            u'jars': [
                                u's3a://datawarehouse-customer360/ETL/spark-examples.jar'
                            ],
                            u'mainClass':
                            u'org.apache.spark.examples.SparkPi'
                        }
                    },
                    #         {
                    #           u'failureAction': u'INTERRUPT_JOB_QUEUE',
                    #           u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
                    #           u'sparkJob': {
                    #             u'applicationArguments': ['10'],
                    #             u'jars': [u's3a://datawarehouse-customer360/ETL/spark-examples.jar'],
                    #             u'mainClass': u'org.apache.spark.examples.SparkPi'
                    #           }
                    #         },
                    #         {
                    #           u'failureAction': u'INTERRUPT_JOB_QUEUE',
                    #           u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
                    #           u'sparkJob': {
                    #             u'applicationArguments': [u'filesystems3.conf'],
                    #             u'jars': [u's3a://datawarehouse-customer360/ETL/envelope-0.6.0-SNAPSHOT-c6.jar'],
                    #             u'mainClass': u'com.cloudera.labs.envelope.EnvelopeMain',
                    #             u'sparkArguments': u'--archives=s3a://datawarehouse-customer360/ETL/filesystems3.conf'
                    #           }
                    #         }
                ],
                u'namespaceName':
                u'crn:altus:sdx:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:namespace:analytics/7ea35fe5-dbc9-4b17-92b1-97a1ab32e410',
                u'publicKey':
                DEFAULT_PUBLIC_KEY.get(),
                u'serviceType':
                u'SPARK',
                u'workersConfiguration': {},
                u'workersGroupSize':
                u'3'
            }
        wf_dir = Submission(
            request.user,
            wf,
            request.fs,
            request.jt,
            mapping,
            local_tz=coordinator.data['properties']['timezone']).deploy()

        properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)}
        properties.update(mapping)

        submission = Submission(request.user,
                                coordinator,
                                request.fs,
                                request.jt,
                                properties=properties)
        job_id = submission.run()

        return job_id
    except RestException, ex:
        LOG.exception('Error submitting coordinator')
        raise PopupException(_("Error submitting coordinator %s") %
                             (coordinator, ),
                             detail=ex._headers.get('oozie-error-message', ex),
                             error_code=200)
Пример #27
0
def save_results_hdfs_file(request, query_history_id):
  """
  Save the results of a query to an HDFS file.

  Do not rerun the query.
  """
  response = {'status': 0, 'message': ''}

  query_history = authorized_get_query_history(request, query_history_id, must_exist=True)
  server_id, state = _get_query_handle_and_state(query_history)
  query_history.save_state(state)
  error_msg, log = None, None

  if request.method != 'POST':
    response['message'] = _('A POST request is required.')
  else:
    if not query_history.is_success():
      response['message'] = _('This query is %(state)s. Results unavailable.') % {'state': state}
      response['status'] = -1
      return JsonResponse(response)

    db = dbms.get(request.user, query_history.get_query_server_config())

    form = beeswax.forms.SaveResultsFileForm({
      'target_file': request.POST.get('path'),
      'overwrite': request.POST.get('overwrite', False),
    })

    if form.is_valid():
      target_file = form.cleaned_data['target_file']
      overwrite = form.cleaned_data['overwrite']

      try:
        handle, state = _get_query_handle_and_state(query_history)
      except Exception as ex:
        response['message'] = _('Cannot find query handle and state: %s') % str(query_history)
        response['status'] = -2
        return JsonResponse(response)

      try:
        if overwrite and request.fs.exists(target_file):
          if request.fs.isfile(target_file):
            request.fs.do_as_user(request.user.username, request.fs.rmtree, target_file)
          else:
            raise PopupException(_("The target path is a directory"))

        upload(target_file, handle, request.user, db, request.fs)

        response['type'] = 'hdfs-file'
        response['id'] = query_history.id
        response['query'] = query_history.query
        response['path'] = target_file
        response['success_url'] = '/filebrowser/view=%s' % target_file
        response['watch_url'] = reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': query_history.id})
      except Exception as ex:
        error_msg, log = expand_exception(ex, db)
        response['message'] = _('The result could not be saved: %s.') % error_msg
        response['status'] = -3
    else:
      response['status'] = 1
      response['errors'] = form.errors

  return JsonResponse(response)
Пример #28
0
def describe_partitions(request, database, table):
  cluster = json.loads(request.POST.get('cluster', '{}'))

  db = _get_db(user=request.user, cluster=cluster)
  table_obj = db.get_table(database, table)

  if not table_obj.partition_keys:
    raise PopupException(_("Table '%(table)s' is not partitioned.") % {'table': table})

  reverse_sort = request.GET.get("sort", "desc").lower() == "desc"

  if request.method == "POST":
    partition_filters = {}
    for part in table_obj.partition_keys:
      if request.GET.get(part.name):
        partition_filters[part.name] = request.GET.get(part.name)
    partition_spec = ','.join(["%s='%s'" % (k, v) for k, v in list(partition_filters.items())])
  else:
    partition_spec = ''

  try:
    partitions = db.get_partitions(database, table_obj, partition_spec, reverse_sort=reverse_sort)
  except:
    LOG.exception('Table partitions could not be retrieved')
    partitions = []
  massaged_partitions = [_massage_partition(database, table_obj, partition) for partition in partitions]

  if request.method == "POST" or request.GET.get('format', 'html') == 'json':
    return JsonResponse({
      'partition_keys_json': [partition.name for partition in table_obj.partition_keys],
      'partition_values_json': massaged_partitions,
    })
  else:
    apps_list = _get_apps(request.user, '')
    return render("metastore.mako", request, {
      'apps': apps_list,
      'breadcrumbs': [{
            'name': database,
            'url': reverse('metastore:show_tables', kwargs={'database': database})
          }, {
            'name': table,
            'url': reverse('metastore:describe_table', kwargs={'database': database, 'table': table})
          }, {
            'name': 'partitions',
            'url': reverse('metastore:describe_partitions', kwargs={'database': database, 'table': table})
          },
        ],
        'database': database,
        'table': table_obj,
        'partitions': partitions,
        'partition_keys_json': json.dumps([partition.name for partition in table_obj.partition_keys]),
        'partition_values_json': json.dumps(massaged_partitions),
        'request': request,
        'has_write_access': has_write_access(request.user),
        'is_optimizer_enabled': has_optimizer(),
        'is_navigator_enabled': has_catalog(request.user),
        'optimizer_url': get_optimizer_url(),
        'navigator_url': get_catalog_url(),
        'is_embeddable': request.GET.get('is_embeddable', False),
        'source_type': _get_servername(db),
    })
Пример #29
0
  def generate_config(self, source, destination):
    configs = []

    if source['channelSourceType'] == 'directory':
      agent_source = '''
  tier1.sources.source1.type = exec
  tier1.sources.source1.command = tail -F %(directory)s
  tier1.sources.source1.channels = channel1
      ''' % {
       'directory': source['channelSourcePath']
    }
    elif source['channelSourceType'] == 'kafka':
      agent_source = '''
  tier1.sources.source1.type = org.apache.flume.source.kafka.KafkaSource
  tier1.sources.source1.channels = channel1
  tier1.sources.source1.batchSize = 5000
  tier1.sources.source1.batchDurationMillis = 2000
  tier1.sources.source1.kafka.bootstrap.servers = localhost:9092
  tier1.sources.source1.kafka.topics = test1, test2
  tier1.sources.source1.kafka.consumer.group.id = custom.g.id
      ''' % {
       'directory': source['channelSourcePath']
    }
    else:
      raise PopupException(_('Input format not recognized: %(channelSourceType)s') % source)

    if destination['ouputFormat'] == 'file':
      agent_sink = '''
  a1.channels = c1
  a1.sinks = k1
  a1.sinks.k1.type = hdfs
  a1.sinks.k1.channel = c1
  a1.sinks.k1.hdfs.path = /flume/events/%y-%m-%d/%H%M/%S
  a1.sinks.k1.hdfs.filePrefix = events-
  a1.sinks.k1.hdfs.round = true
  a1.sinks.k1.hdfs.roundValue = 10
  a1.sinks.k1.hdfs.roundUnit = minute'''
    elif destination['ouputFormat'] == 'table':
      agent_sink = '''
  a1.channels = c1
  a1.channels.c1.type = memory
  a1.sinks = k1
  a1.sinks.k1.type = hive
  a1.sinks.k1.channel = c1
  a1.sinks.k1.hive.metastore = thrift://127.0.0.1:9083
  a1.sinks.k1.hive.database = logsdb
  a1.sinks.k1.hive.table = weblogs
  a1.sinks.k1.hive.partition = asia,%{country},%y-%m-%d-%H-%M
  a1.sinks.k1.useLocalTimeStamp = false
  a1.sinks.k1.round = true
  a1.sinks.k1.roundValue = 10
  a1.sinks.k1.roundUnit = minute
  a1.sinks.k1.serializer = DELIMITED
  a1.sinks.k1.serializer.delimiter = "\t"
  a1.sinks.k1.serializer.serdeSeparator = '\t'
  a1.sinks.k1.serializer.fieldnames =id,,msg'''
    elif destination['ouputFormat'] == 'kafka':
      manager = ManagerApi()
      agent_sink = '''
      tier1.sinks.sink1.type = org.apache.flume.sink.kafka.KafkaSink
tier1.sinks.sink1.topic = hueAccessLogs
tier1.sinks.sink1.brokerList = %(brokers)s
tier1.sinks.sink1.channel = channel1
tier1.sinks.sink1.batchSize = 20''' % {
      'brokers': manager.get_kafka_brokers()
    }

    elif destination['ouputFormat'] == 'index':
      # Morphline file
      configs.append(self.generate_morphline_config(destination))
      # Flume config
      agent_sink = '''
  tier1.sinks.sink1.type          = org.apache.flume.sink.solr.morphline.MorphlineSolrSink
  tier1.sinks.sink1.morphlineFile = morphlines.conf
  tier1.sinks.sink1.morphlineId = hue_accesslogs_no_geo
  tier1.sinks.sink1.channel       = channel1'''
    else:
      raise PopupException(_('Output format not recognized: %(ouputFormat)s') % destination)

    # TODO: use agent id: input + output and do not overide all the configs
    # TODO: use Kafka channel if possible
    flume_config = '''tier1.sources = source1
  tier1.channels = channel1
  tier1.sinks = sink1

  %(sources)s

  tier1.channels.channel1.type = memory
  tier1.channels.channel1.capacity = 10000
  tier1.channels.channel1.transactionCapacity = 1000

  %(sinks)s''' % {
    'sources': agent_source,
    'sinks': agent_sink,
  }

    configs.append(('agent_config_file', flume_config))

    return configs
Пример #30
0
 def _execute(self, function, *args, **kwargs):
   response = None
   try:
     response = function(*args, **kwargs)
   except Exception, e:
     raise PopupException(_('YARN RM returned a failed response: %s') % e)