示例#1
0
文件: views.py 项目: anutron/hue
def view_results(request, id, first_row=0):
  """
  Returns the view for the results of the QueryHistory with the given id.

  The query results MUST be ready.
  To display query results, one should always go through the watch_query view.

  If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
  spits out a warning if first_row doesn't match the servers conception.
  Multiple readers will produce a confusing interaction here, and that's known.

  It understands the ``context`` GET parameter. (See watch_query().)
  """
  # Coerce types; manage arguments
  id = int(id)
  first_row = long(first_row)
  start_over = (first_row == 0)

  # Retrieve models from database
  query_history = models.QueryHistory.objects.get(id=id)
  handle = QueryHandle(id=query_history.server_id, log_context=query_history.log_context)
  context = _parse_query_context(request.GET.get('context'))

  # Retrieve query results
  try:
    results = db_utils.db_client().fetch(handle, start_over)
    assert results.ready, 'Trying to display result that is not yet ready. Query id %s' % (id,)
    # We display the "Download" button only when we know
    # that there are results:
    downloadable = (first_row > 0 or len(results.data) > 0)
    fetch_error = False
  except BeeswaxException, ex:
    fetch_error = True
    error_message, log = expand_exception(ex)
示例#2
0
文件: views.py 项目: anutron/hue
def explain_directly(request, query_str, query_msg, design):
  """
  Runs explain query.
  """
  explanation = db_utils.db_client().explain(query_msg)
  context = ("design", design)
  return render('explain.mako', request,
    dict(query=query_str, explanation=explanation.textual, query_context=context))
示例#3
0
def watch_query(request, id):
  """
  Wait for the query to finish and (by default) displays the results of query id.
  It understands the optional GET params:

    on_success_url
      If given, it will be displayed when the query is successfully finished.
      Otherwise, it will display the view query results page by default.

    context
      A string of "name:data" that describes the context
      that generated this query result. It may be:
        - "table":"<table_name>"
        - "design":<design_id>

  All other GET params will be passed to on_success_url (if present).
  """
  # Coerce types; manage arguments
  id = int(id)

  query_history = authorized_get_history(request, id, must_exist=True)

  # GET param: context.
  context_param = request.GET.get('context', '')

  # GET param: on_success_url. Default to view_results
  results_url = urlresolvers.reverse(view_results, kwargs=dict(id=str(id), first_row=0, last_result_len=0))
  on_success_url = request.GET.get('on_success_url')
  if not on_success_url:
    on_success_url = results_url

  # Get the server_id
  server_id, state = _get_server_id_and_state(query_history)
  query_history.save_state(state)

  # Query finished?
  if state == QueryHistory.STATE.expired:
    raise PopupException(_("The result of this query has expired."))
  elif state == QueryHistory.STATE.available:
    return format_preserving_redirect(request, on_success_url, request.GET)
  elif state == QueryHistory.STATE.failed:
    # When we fetch, Beeswax server will throw us a BeeswaxException, which has the
    # log we want to display.
    return format_preserving_redirect(request, results_url, request.GET)

  # Still running
  log = db_utils.db_client(query_history.get_query_server()).get_log(server_id)

  # Keep waiting
  # - Translate context into something more meaningful (type, data)
  context = _parse_query_context(context_param)
  return render('watch_wait.mako', request, {
    'query': query_history,
    'fwd_params': request.GET.urlencode(),
    'log': log,
    'hadoop_jobs': _parse_out_hadoop_jobs(log),
    'query_context': context,
  })
示例#4
0
def data_generator(query_model, formatter):
    """
  data_generator(query_model, formatter) -> generator object

  Return a generator object for a csv. The first line is the column names.

  This is similar to export_csvxls.generator, but has
  one or two extra complexities.
  """
    global _DATA_WAIT_SLEEP
    is_first_row = True
    next_row = 0
    results = None
    handle = QueryHandle(query_model.server_id, query_model.log_context)

    yield formatter.init_doc()

    while True:
        # Make sure that we have the next batch of ready results
        while results is None or not results.ready:
            results = db_utils.db_client().fetch(handle,
                                                 start_over=is_first_row)
            if not results.ready:
                time.sleep(_DATA_WAIT_SLEEP)

        # Someone is reading the results concurrently. Abort.
        # But unfortunately, this current generator will produce incomplete data.
        if next_row != results.start_row:
            msg = 'Error: Potentially incomplete results as an error occur during data retrieval.'
            yield formatter.format_row([msg])
            err = ('Detected another client retrieving results for %s. '
                   'Expect next row being %s and got %s. Aborting' %
                   (query_model.server_id, next_row, results.start_row))
            LOG.error(err)
            raise RuntimeError(err)

        if is_first_row:
            is_first_row = False
            yield formatter.format_header(results.columns)
        else:
            for i, row in enumerate(results.data):
                # TODO(bc): Hive seems to always return tab delimited row data.
                # What if a cell has a tab?
                row = row.split('\t')
                try:
                    yield formatter.format_row(row)
                except TooBigToDownloadException, ex:
                    LOG.error(ex)
                    # Exceeded limit. Stop.
                    results.has_more = False
                    break

            if results.has_more:
                next_row += len(results.data)
                results = None
            else:
                yield formatter.fini_doc()
                break
def data_generator(query_model, formatter):
  """
  data_generator(query_model, formatter) -> generator object

  Return a generator object for a csv. The first line is the column names.

  This is similar to export_csvxls.generator, but has
  one or two extra complexities.
  """
  global _DATA_WAIT_SLEEP
  is_first_row = True
  next_row = 0
  results = None
  handle = QueryHandle(query_model.server_id, query_model.log_context)

  yield formatter.init_doc()

  while True:
    # Make sure that we have the next batch of ready results
    while results is None or not results.ready:
      results = db_utils.db_client(query_model.get_query_server()).fetch(handle, start_over=is_first_row, fetch_size=-1)
      if not results.ready:
        time.sleep(_DATA_WAIT_SLEEP)

    # Someone is reading the results concurrently. Abort.
    # But unfortunately, this current generator will produce incomplete data.
    if next_row != results.start_row:
      msg = _('Error: Potentially incomplete results as an error occurred during data retrieval.')
      yield formatter.format_row([msg])
      err = (_('Detected another client retrieving results for %(server_id)s. '
             'Expected next row to be %(row)s and got %(start_row)s. Aborting') %
             {'server_id': query_model.server_id, 'row': next_row, 'start_row': results.start_row})
      LOG.error(err)
      raise RuntimeError(err)

    if is_first_row:
      is_first_row = False
      yield formatter.format_header(results.columns)
    else:
      for i, row in enumerate(results.data):
        # TODO(bc): Hive seems to always return tab delimited row data.
        # What if a cell has a tab?
        row = row.split('\t')
        try:
          yield formatter.format_row(row)
        except TooBigToDownloadException, ex:
          LOG.error(ex)
          # Exceeded limit. Stop.
          results.has_more = False
          break

      if results.has_more:
        next_row += len(results.data)
        results = None
      else:
        yield formatter.fini_doc()
        break
示例#6
0
文件: views.py 项目: anutron/hue
def expand_exception(exc):
  """expand_exception(exc) -> (error msg, log message)"""
  try:
    log = db_utils.db_client().get_log(exc.log_context)
  except:
    # Always show something, even if server has died on the job.
    log = "Could not retrieve log."
  if not exc.message:
    error_message = "Unknown exception."
  else:
    error_message = exc.message
  return error_message, log
示例#7
0
文件: views.py 项目: kthguru/hue
def expand_exception(exc):
  """expand_exception(exc) -> (error msg, log message)"""
  try:
    log = db_utils.db_client().get_log(exc.log_context)
  except:
    # Always show something, even if server has died on the job.
    log = _("Could not retrieve log.")
  if not exc.message:
    error_message = _("Unknown exception.")
  else:
    error_message = force_unicode(exc.message, strings_only=True, errors='replace')
  return error_message, log
示例#8
0
def configuration(request):
  if request.method == 'POST':
    server_form = QueryServerForm(request.POST)
    if server_form.is_valid():
      query_server = db_utils.get_query_server(server_form.cleaned_data["server"])
      config_values = db_utils.db_client(query_server).get_default_configuration(
                          bool(request.REQUEST.get("include_hadoop", False)))
  else:
    server_form = QueryServerForm()
    config_values = {}

  return render("configuration.mako", request, {'config_values': config_values,
                                                'server_form': server_form})
示例#9
0
def view_results(request, id, first_row=0, last_result_len=0):
    """
  Returns the view for the results of the QueryHistory with the given id.

  The query results MUST be ready.
  To display query results, one should always go through the watch_query view.

  If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
  spits out a warning if first_row doesn't match the servers conception.
  Multiple readers will produce a confusing interaction here, and that's known.

  It understands the ``context`` GET parameter. (See watch_query().)
  """
    # Coerce types; manage arguments
    id = int(id)
    first_row = long(first_row)
    start_over = (first_row == 0)

    query_history = authorized_get_history(request, id, must_exist=True)

    handle = QueryHandle(id=query_history.server_id,
                         log_context=query_history.log_context)
    context = _parse_query_context(request.GET.get('context'))

    # Retrieve query results
    try:
        results = db_utils.db_client(query_history.get_query_server()).fetch(
            handle, start_over, -1)
        assert results.ready, _(
            'Trying to display result that is not yet ready. Query id %(id)s'
        ) % {
            'id': id
        }
        # We display the "Download" button only when we know
        # that there are results:
        downloadable = (first_row > 0 or len(results.data) > 0)
        fetch_error = False
    except BeeswaxException, ex:
        fetch_error = True
        error_message, log = expand_exception(ex)
示例#10
0
文件: views.py 项目: anutron/hue
def save_results(request, id):
  """
  Save the results of a query to an HDFS directory
  """
  id = int(id)
  query_history = models.QueryHistory.objects.get(id=id)
  if query_history.owner != request.user:
    raise PopupException('This action is only available to the user who submitted the query.')
  _, state = _get_server_id_and_state(query_history)
  query_history.save_state(state)
  error_msg, log = None, None

  if request.method == 'POST':
    # Make sure the result is available.
    # Note that we may still hit errors during the actual save
    if state != models.QueryHistory.STATE.available:
      if state in (models.QueryHistory.STATE.failed, models.QueryHistory.STATE.expired):
        msg = 'This query has %s. Results unavailable.' % (state,)
      else:
        msg = 'The result of this query is not available yet.'
      raise PopupException(msg)

    form = beeswax.forms.SaveResultsForm(request.POST)

    # Cancel goes back to results
    if request.POST.get('cancel'):
      return format_preserving_redirect(request, '/beeswax/watch/%s' % (id,))
    if form.is_valid():
      # Do save
      # 1. Get the results metadata
      assert request.POST.get('save')
      handle = QueryHandle(id=query_history.server_id, log_context=query_history.log_context)
      try:
        result_meta = db_utils.db_client().get_results_metadata(handle)
      except QueryNotFoundException, ex:
        LOG.exception(ex)
        raise PopupException('Cannot find query.')
      if result_meta.table_dir:
        result_meta.table_dir = request.fs.urlsplit(result_meta.table_dir)[2]

      # 2. Check for partitioned tables
      if result_meta.table_dir is None:
        raise PopupException(
                  'Saving results from a partitioned table is not supported. '
                  'You may copy from the HDFS location manually.')

      # 3. Actual saving of results
      try:
        if form.cleaned_data['save_target'] == form.SAVE_TYPE_DIR:
          # To dir
          if result_meta.in_tablename:
            raise PopupException(
                      'Saving results from a table to a directory is not supported. '
                      'You may copy from the HDFS location manually.')
          target_dir = form.cleaned_data['target_dir']
          request.fs.rename_star(result_meta.table_dir, target_dir)
          LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, target_dir))
          query_history.save_state(models.QueryHistory.STATE.expired)
          fb_url = location_to_url(request, target_dir, strict=False)
          popup = PopupWithJframe('Query results stored in %s' % (target_dir,),
                                  launch_app_name='FileBrowser',
                                  launch_app_url=fb_url)
          return render_injected(list_query_history(request), popup)
        elif form.cleaned_data['save_target'] == form.SAVE_TYPE_TBL:
          # To new table
          try:
            return _save_results_ctas(request,
                                      query_history,
                                      form.cleaned_data['target_table'],
                                      result_meta)
          except BeeswaxException, bex:
            LOG.exception(bex)
            error_msg, log = expand_exception(bex)
      except IOError, ex:
        LOG.exception(ex)
        error_msg = str(ex)
示例#11
0
文件: views.py 项目: anutron/hue
def configuration(request):
  config_values = db_utils.db_client().get_default_configuration(
                      bool(request.REQUEST.get("include_hadoop", False)))
  return render("configuration.mako", request, dict(config_values=config_values))
示例#12
0
文件: views.py 项目: anutron/hue
    fetch_error = True
    error_message, log = expand_exception(ex)

  # Handle errors
  if fetch_error:
    return render('watch_results.mako', request, {
      'query': query_history,
      'error': True,
      'error_message': error_message,
      'log': log,
      'hadoop_jobs': _parse_out_hadoop_jobs(log),
      'query_context': context,
      'can_save': False,
    })

  log = db_utils.db_client().get_log(query_history.server_id)
  download_urls = {}
  if downloadable:
    for format in common.DL_FORMATS:
      download_urls[format] = urlresolvers.reverse(
                                    download, kwargs=dict(id=str(id), format=format))

  save_form = beeswax.forms.SaveResultsForm()

  # Display the results
  return render('watch_results.mako', request, {
    'error': False,
    'query': query_history,
    # Materialize, for easier testability.
    'results': list(parse_results(results.data)),
    'has_more': results.has_more,
示例#13
0
def watch_query(request, id):
    """
  Wait for the query to finish and (by default) displays the results of query id.
  It understands the optional GET params:

    on_success_url
      If given, it will be displayed when the query is successfully finished.
      Otherwise, it will display the view query results page by default.

    context
      A string of "name:data" that describes the context
      that generated this query result. It may be:
        - "table":"<table_name>"
        - "design":<design_id>

  All other GET params will be passed to on_success_url (if present).
  """
    # Coerce types; manage arguments
    id = int(id)

    query_history = authorized_get_history(request, id, must_exist=True)

    # GET param: context.
    context_param = request.GET.get('context', '')

    # GET param: on_success_url. Default to view_results
    results_url = urlresolvers.reverse(view_results,
                                       kwargs=dict(id=str(id),
                                                   first_row=0,
                                                   last_result_len=0))
    on_success_url = request.GET.get('on_success_url')
    if not on_success_url:
        on_success_url = results_url

    # Get the server_id
    server_id, state = _get_server_id_and_state(query_history)
    query_history.save_state(state)

    # Query finished?
    if state == QueryHistory.STATE.expired:
        raise PopupException(_("The result of this query has expired."))
    elif state == QueryHistory.STATE.available:
        return format_preserving_redirect(request, on_success_url, request.GET)
    elif state == QueryHistory.STATE.failed:
        # When we fetch, Beeswax server will throw us a BeeswaxException, which has the
        # log we want to display.
        return format_preserving_redirect(request, results_url, request.GET)

    # Still running
    log = db_utils.db_client(
        query_history.get_query_server()).get_log(server_id)

    # Keep waiting
    # - Translate context into something more meaningful (type, data)
    context = _parse_query_context(context_param)
    return render(
        'watch_wait.mako', request, {
            'query': query_history,
            'fwd_params': request.GET.urlencode(),
            'log': log,
            'hadoop_jobs': _parse_out_hadoop_jobs(log),
            'query_context': context,
        })
示例#14
0
        error_message, log = expand_exception(ex)

    # Handle errors
    if fetch_error:
        return render(
            'watch_results.mako', request, {
                'query': query_history,
                'error': True,
                'error_message': error_message,
                'log': log,
                'hadoop_jobs': _parse_out_hadoop_jobs(log),
                'query_context': context,
                'can_save': False,
            })

    log = db_utils.db_client(query_history.get_query_server()).get_log(
        query_history.server_id)
    download_urls = {}
    if downloadable:
        for format in common.DL_FORMATS:
            download_urls[format] = urlresolvers.reverse(download,
                                                         kwargs=dict(
                                                             id=str(id),
                                                             format=format))

    save_form = SaveResultsForm()
    has_more = True
    last_result_len = long(last_result_len)
    if (last_result_len != 0 and len(results.data) != last_result_len) or len(
            results.data) == 0:
        has_more = False
    # Display the results
示例#15
0
文件: views.py 项目: kthguru/hue
def save_results(request, id):
  """
  Save the results of a query to an HDFS directory
  """
  id = int(id)
  query_history = models.QueryHistory.objects.get(id=id)
  if query_history.owner != request.user:
    raise PopupException(_('This action is only available to the user who submitted the query.'))
  server_id, state = _get_server_id_and_state(query_history)
  query_history.save_state(state)
  error_msg, log = None, None

  if request.method == 'POST':
    # Make sure the result is available.
    # Note that we may still hit errors during the actual save
    if state != models.QueryHistory.STATE.available:
      if state in (models.QueryHistory.STATE.failed, models.QueryHistory.STATE.expired):
        msg = _('This query has %(state)s. Results unavailable.') % {'state': state}
      else:
        msg = _('The result of this query is not available yet.')
      raise PopupException(msg)

    form = beeswax.forms.SaveResultsForm(request.POST)

    # Cancel goes back to results
    if request.POST.get('cancel'):
      return format_preserving_redirect(request, '/beeswax/watch/%s' % (id,))
    if form.is_valid():
      # Do save
      # 1. Get the results metadata
      assert request.POST.get('save')
      handle = QueryHandle(id=query_history.server_id, log_context=query_history.log_context)
      try:
        result_meta = db_utils.db_client().get_results_metadata(handle)
      except QueryNotFoundException, ex:
        LOG.exception(ex)
        raise PopupException(_('Cannot find query.'))
      if result_meta.table_dir:
        result_meta.table_dir = request.fs.urlsplit(result_meta.table_dir)[2]

      # 2. Check for partitioned tables
      if result_meta.table_dir is None:
        raise PopupException(_('Saving results from a partitioned table is not supported. You may copy from the HDFS location manually.'))

      # 3. Actual saving of results
      try:
        if form.cleaned_data['save_target'] == form.SAVE_TYPE_DIR:
          # To dir
          if result_meta.in_tablename:
            raise PopupException(_('Saving results from a table to a directory is not supported. You may copy from the HDFS location manually.'))
          target_dir = form.cleaned_data['target_dir']
          request.fs.rename_star(result_meta.table_dir, target_dir)
          LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, target_dir))
          query_history.save_state(models.QueryHistory.STATE.expired)
          return HttpResponse(urlresolvers.reverse('filebrowser.views.view', kwargs={'path': target_dir}))
        elif form.cleaned_data['save_target'] == form.SAVE_TYPE_TBL:
          # To new table
          try:
            return _save_results_ctas(request,
                                      query_history,
                                      form.cleaned_data['target_table'],
                                      result_meta)
          except BeeswaxException, bex:
            LOG.exception(bex)
            error_msg, log = expand_exception(bex)
      except WebHdfsException, ex:
        raise PopupException(_('The table could not be saved.'), detail=ex)
      except IOError, ex:
        LOG.exception(ex)
        error_msg = str(ex)