示例#1
0
def import_wizard(request, database='default'):
  """
  Help users define table and based on a file they want to import to Hive.
  Limitations:
    - Rows are delimited (no serde).
    - No detection for map and array types.
    - No detection for the presence of column header in the first row.
    - No partition table.
    - Does not work with binary data.
  """
  encoding = i18n.get_site_encoding()
  app_name = get_app_name(request)

  if request.method == 'POST':
    #
    # General processing logic:
    # - We have 3 steps. Each requires the previous.
    #   * Step 1      : Table name and file location
    #   * Step 2a     : Display sample with auto chosen delim
    #   * Step 2b     : Display sample with user chosen delim (if user chooses one)
    #   * Step 3      : Display sample, and define columns
    # - Each step is represented by a different form. The form of an earlier step
    #   should be present when submitting to a later step.
    # - To preserve the data from the earlier steps, we send the forms back as
    #   hidden fields. This way, when users revisit a previous step, the data would
    #   be there as well.
    #
    delim_is_auto = False
    fields_list, n_cols = [[]], 0
    s3_col_formset = None

    db = dbms.get(request.user)
    s1_file_form = CreateByImportFileForm(request.POST, db=db)

    if s1_file_form.is_valid():
      do_s2_auto_delim = request.POST.get('submit_file')        # Step 1 -> 2
      do_s2_user_delim = request.POST.get('submit_preview')     # Step 2 -> 2
      do_s3_column_def = request.POST.get('submit_delim')       # Step 2 -> 3
      do_hive_create = request.POST.get('submit_create')        # Step 3 -> execute

      cancel_s2_user_delim = request.POST.get('cancel_delim')   # Step 2 -> 1
      cancel_s3_column_def = request.POST.get('cancel_create')  # Step 3 -> 2

      # Exactly one of these should be True
      if len(filter(None, (do_s2_auto_delim, do_s2_user_delim, do_s3_column_def, do_hive_create, cancel_s2_user_delim, cancel_s3_column_def))) != 1:
        raise PopupException(_('Invalid form submission'))

      if not do_s2_auto_delim:
        # We should have a valid delim form
        s2_delim_form = CreateByImportDelimForm(request.POST)
        if not s2_delim_form.is_valid():
          # Go back to picking delimiter
          do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False
      if do_hive_create:
        # We should have a valid columns formset
        s3_col_formset = ColumnTypeFormSet(prefix='cols', data=request.POST)
        if not s3_col_formset.is_valid():
          # Go back to define columns
          do_s3_column_def, do_hive_create = True, False

      #
      # Go to step 2: We've just picked the file. Preview it.
      #
      if do_s2_auto_delim:
        delim_is_auto = True
        fields_list, n_cols, s2_delim_form = _delim_preview(request.fs, s1_file_form, encoding, [reader.TYPE for reader in FILE_READERS], DELIMITERS)

      if (do_s2_user_delim or do_s3_column_def or cancel_s3_column_def) and s2_delim_form.is_valid():
        # Delimit based on input
        fields_list, n_cols, s2_delim_form = _delim_preview(request.fs, s1_file_form, encoding, (s2_delim_form.cleaned_data['file_type'],),
                                                            (s2_delim_form.cleaned_data['delimiter'],))

      if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def:
        return render('choose_delimiter.mako', request, {
          'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
          'delim_readable': DELIMITER_READABLE.get(s2_delim_form['delimiter'].data[0], s2_delim_form['delimiter'].data[1]),
          'initial': delim_is_auto,
          'file_form': s1_file_form,
          'delim_form': s2_delim_form,
          'fields_list': fields_list,
          'delimiter_choices': TERMINATOR_CHOICES,
          'n_cols': n_cols,
          'database': database,
        })

      #
      # Go to step 3: Define column.
      #
      if do_s3_column_def:
        if s3_col_formset is None:
          columns = []
          for i in range(n_cols):
            columns.append(dict(
                column_name='col_%s' % (i,),
                column_type='string',
            ))
          s3_col_formset = ColumnTypeFormSet(prefix='cols', initial=columns)
        return render('define_columns.mako', request, {
          'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
          'file_form': s1_file_form,
          'delim_form': s2_delim_form,
          'column_formset': s3_col_formset,
          'fields_list': fields_list,
          'n_cols': n_cols,
          'database': database,
        })

      #
      # Final: Execute
      #
      if do_hive_create:
        delim = s2_delim_form.cleaned_data['delimiter']
        table_name = s1_file_form.cleaned_data['name']
        proposed_query = django_mako.render_to_string("create_table_statement.mako", {
            'table': dict(name=table_name,
                          comment=s1_file_form.cleaned_data['comment'],
                          row_format='Delimited',
                          field_terminator=delim),
            'columns': [ f.cleaned_data for f in s3_col_formset.forms ],
            'partition_columns': [],
            'database': database,
          }
        )

        do_load_data = s1_file_form.cleaned_data.get('do_import')
        path = s1_file_form.cleaned_data['path']
        return _submit_create_and_load(request, proposed_query, table_name, path, do_load_data, database=database)
  else:
    s1_file_form = CreateByImportFileForm()

  return render('choose_file.mako', request, {
    'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
    'file_form': s1_file_form,
    'database': database,
  })
示例#2
0
def import_wizard(request, database='default'):
    """
  Help users define table and based on a file they want to import to Hive.
  Limitations:
    - Rows are delimited (no serde).
    - No detection for map and array types.
    - No detection for the presence of column header in the first row.
    - No partition table.
    - Does not work with binary data.
  """
    encoding = i18n.get_site_encoding()
    app_name = get_app_name(request)

    db = dbms.get(request.user)
    dbs = db.get_databases()
    databases = [{
        'name':
        db,
        'url':
        reverse('beeswax:import_wizard', kwargs={'database': db})
    } for db in dbs]

    if request.method == 'POST':
        #
        # General processing logic:
        # - We have 3 steps. Each requires the previous.
        #   * Step 1      : Table name and file location
        #   * Step 2a     : Display sample with auto chosen delim
        #   * Step 2b     : Display sample with user chosen delim (if user chooses one)
        #   * Step 3      : Display sample, and define columns
        # - Each step is represented by a different form. The form of an earlier step
        #   should be present when submitting to a later step.
        # - To preserve the data from the earlier steps, we send the forms back as
        #   hidden fields. This way, when users revisit a previous step, the data would
        #   be there as well.
        #
        delim_is_auto = False
        fields_list, n_cols = [[]], 0
        s3_col_formset = None
        s1_file_form = CreateByImportFileForm(request.POST, db=db)

        if s1_file_form.is_valid():
            do_s2_auto_delim = request.POST.get('submit_file')  # Step 1 -> 2
            do_s2_user_delim = request.POST.get(
                'submit_preview')  # Step 2 -> 2
            do_s3_column_def = request.POST.get('submit_delim')  # Step 2 -> 3
            do_hive_create = request.POST.get(
                'submit_create')  # Step 3 -> execute

            cancel_s2_user_delim = request.POST.get(
                'cancel_delim')  # Step 2 -> 1
            cancel_s3_column_def = request.POST.get(
                'cancel_create')  # Step 3 -> 2

            # Exactly one of these should be True
            if len([
                    _f for _f in (do_s2_auto_delim, do_s2_user_delim,
                                  do_s3_column_def, do_hive_create,
                                  cancel_s2_user_delim, cancel_s3_column_def)
                    if _f
            ]) != 1:
                raise PopupException(_('Invalid form submission'))

            if not do_s2_auto_delim:
                # We should have a valid delim form
                s2_delim_form = CreateByImportDelimForm(request.POST)
                if not s2_delim_form.is_valid():
                    # Go back to picking delimiter
                    do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False
            if do_hive_create:
                # We should have a valid columns formset
                s3_col_formset = ColumnTypeFormSet(prefix='cols',
                                                   data=request.POST)
                if not s3_col_formset.is_valid():
                    # Go back to define columns
                    do_s3_column_def, do_hive_create = True, False

            load_data = s1_file_form.cleaned_data.get('load_data',
                                                      'IMPORT').upper()
            path = s1_file_form.cleaned_data['path']

            #
            # Go to step 2: We've just picked the file. Preview it.
            #
            if do_s2_auto_delim:
                try:
                    if load_data == 'IMPORT':
                        if not request.fs.isfile(path):
                            raise PopupException(
                                _('Path location must refer to a file if "Import Data" is selected.'
                                  ))
                    elif load_data == 'EXTERNAL':
                        if not request.fs.isdir(path):
                            raise PopupException(
                                _('Path location must refer to a directory if "Create External Table" is selected.'
                                  ))
                except (IOError, S3FileSystemException) as e:
                    raise PopupException(
                        _('Path location "%s" is invalid: %s') % (path, e))

                delim_is_auto = True
                fields_list, n_cols, s2_delim_form = _delim_preview(
                    request.fs, s1_file_form, encoding,
                    [reader.TYPE for reader in FILE_READERS], DELIMITERS)

            if (do_s2_user_delim or do_s3_column_def
                    or cancel_s3_column_def) and s2_delim_form.is_valid():
                # Delimit based on input
                fields_list, n_cols, s2_delim_form = _delim_preview(
                    request.fs, s1_file_form, encoding,
                    (s2_delim_form.cleaned_data['file_type'], ),
                    (s2_delim_form.cleaned_data['delimiter'], ))

            if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def:
                apps_list = _get_apps(request.user, '')
                return render(
                    'import_wizard_choose_delimiter.mako', request, {
                        'apps':
                        apps_list,
                        'action':
                        reverse(app_name + ':import_wizard',
                                kwargs={'database': database}),
                        'delim_readable':
                        DELIMITER_READABLE.get(
                            s2_delim_form['delimiter'].data[0],
                            s2_delim_form['delimiter'].data[1]),
                        'initial':
                        delim_is_auto,
                        'file_form':
                        s1_file_form,
                        'delim_form':
                        s2_delim_form,
                        'fields_list':
                        fields_list,
                        'delimiter_choices':
                        TERMINATOR_CHOICES,
                        'n_cols':
                        n_cols,
                        'database':
                        database,
                        'databases':
                        databases
                    })

            #
            # Go to step 3: Define column.
            #
            if do_s3_column_def:
                if s3_col_formset is None:
                    columns = []
                    for i in range(n_cols):
                        columns.append({
                            'column_name': 'col_%s' % (i, ),
                            'column_type': 'string',
                        })
                    s3_col_formset = ColumnTypeFormSet(prefix='cols',
                                                       initial=columns)
                try:
                    fields_list_for_json = list(fields_list)
                    if fields_list_for_json:
                        fields_list_for_json[0] = [
                            re.sub('[^\w]', '', a)
                            for a in fields_list_for_json[0]
                        ]  # Cleaning headers
                    apps_list = _get_apps(request.user, '')
                    return render(
                        'import_wizard_define_columns.mako', request, {
                            'apps':
                            apps_list,
                            'action':
                            reverse(app_name + ':import_wizard',
                                    kwargs={'database': database}),
                            'file_form':
                            s1_file_form,
                            'delim_form':
                            s2_delim_form,
                            'column_formset':
                            s3_col_formset,
                            'fields_list':
                            fields_list,
                            'fields_list_json':
                            json.dumps(fields_list_for_json),
                            'n_cols':
                            n_cols,
                            'database':
                            database,
                            'databases':
                            databases
                        })
                except Exception as e:
                    raise PopupException(_(
                        "The selected delimiter is creating an un-even number of columns. Please make sure you don't have empty columns."
                    ),
                                         detail=e)

            #
            # Final: Execute
            #
            if do_hive_create:
                delim = s2_delim_form.cleaned_data['delimiter']
                table_name = s1_file_form.cleaned_data['name']

                proposed_query = django_mako.render_to_string(
                    "create_table_statement.mako", {
                        'table': {
                            'name':
                            table_name,
                            'comment':
                            s1_file_form.cleaned_data['comment'],
                            'row_format':
                            'Delimited',
                            'field_terminator':
                            delim,
                            'file_format':
                            'TextFile',
                            'load_data':
                            load_data,
                            'path':
                            path,
                            'skip_header':
                            request.GET.get('removeHeader', 'off').lower()
                            == 'on'
                        },
                        'columns':
                        [f.cleaned_data for f in s3_col_formset.forms],
                        'partition_columns': [],
                        'database': database,
                        'databases': databases
                    })
                try:
                    return _submit_create_and_load(request,
                                                   proposed_query,
                                                   table_name,
                                                   path,
                                                   load_data,
                                                   database=database)
                except QueryServerException as e:
                    raise PopupException(_('The table could not be created.'),
                                         detail=e.message)
    else:
        s1_file_form = CreateByImportFileForm()

    return render(
        'import_wizard_choose_file.mako', request, {
            'action':
            reverse(app_name + ':import_wizard', kwargs={'database': database
                                                         }),
            'file_form':
            s1_file_form,
            'database':
            database,
            'databases':
            databases
        })