Exemplos de run_sqoop em Python, exemplos de indexer.indexers.rdbms.run_sqoop em Python

Exemplo n.º 1

0

Exibir arquivo

def importer_submit(request):
  source = json.loads(request.POST.get('source', '{}'))
  outputFormat = json.loads(request.POST.get('destination', '{}'))['outputFormat']
  destination = json.loads(request.POST.get('destination', '{}'))
  destination['ouputFormat'] = outputFormat # Workaround a very weird bug
  start_time = json.loads(request.POST.get('start_time', '-1'))

  if source['inputFormat'] == 'file':
    source['path'] = request.fs.netnormpath(source['path']) if source['path'] else source['path']
  if destination['ouputFormat'] in ('database', 'table'):
    destination['nonDefaultLocation'] = request.fs.netnormpath(destination['nonDefaultLocation']) if destination['nonDefaultLocation'] else destination['nonDefaultLocation']

  if destination['ouputFormat'] == 'index':
    source['columns'] = destination['columns']
    index_name = destination["name"]

    if destination['indexerRunJob']:
      _convert_format(source["format"], inverse=True)
      job_handle = _index(request, source, index_name, start_time=start_time, lib_path=destination['indexerJobLibPath'])
    else:
      client = SolrClient(request.user)
      job_handle = _create_index(request.user, request.fs, client, source, destination, index_name)
  elif destination['ouputFormat'] == 'database':
    job_handle = _create_database(request, source, destination, start_time)
  elif source['inputFormat'] == 'rdbms':
    if destination['outputFormat'] in ('file', 'table', 'hbase'):
      job_handle = run_sqoop(request, source, destination, start_time)
  else:
    job_handle = _create_table(request, source, destination, start_time)

  return JsonResponse(job_handle)

Exemplo n.º 2

0

Exibir arquivo

def importer_submit(request):
  source = json.loads(request.POST.get('source', '{}'))
  outputFormat = json.loads(request.POST.get('destination', '{}'))['outputFormat']
  destination = json.loads(request.POST.get('destination', '{}'))
  destination['ouputFormat'] = outputFormat # Workaround a very weird bug
  start_time = json.loads(request.POST.get('start_time', '-1'))

  if source['inputFormat'] == 'file':
    if source['path']:
      path = urllib.unquote(source['path'])
      source['path'] = request.fs.netnormpath(path)
      parent_path = request.fs.parent_path(path)
      stats = request.fs.stats(parent_path)
      split = urlparse(path)
      # Only for HDFS, import data and non-external table
      if split.scheme in ('', 'hdfs') and destination['importData'] and destination['useDefaultLocation'] and oct(stats["mode"])[-1] != '7':
        user_scratch_dir = request.fs.get_home_dir() + '/.scratchdir'
        request.fs.do_as_user(request.user, request.fs.mkdir, user_scratch_dir, 00777)
        request.fs.do_as_user(request.user, request.fs.rename, source['path'], user_scratch_dir)
        source['path'] = user_scratch_dir + '/' + source['path'].split('/')[-1]

  if destination['ouputFormat'] in ('database', 'table'):
    destination['nonDefaultLocation'] = request.fs.netnormpath(destination['nonDefaultLocation']) if destination['nonDefaultLocation'] else destination['nonDefaultLocation']

  if source['inputFormat'] == 'stream':
    job_handle = _envelope_job(request, source, destination, start_time=start_time, lib_path=destination['indexerJobLibPath'])
  elif destination['ouputFormat'] == 'index':
    source['columns'] = destination['columns']
    index_name = destination["name"]

    if destination['indexerRunJob']:
      _convert_format(source["format"], inverse=True)
      job_handle = _large_indexing(request, source, index_name, start_time=start_time, lib_path=destination['indexerJobLibPath'])
    else:
      client = SolrClient(request.user)
      job_handle = _small_indexing(request.user, request.fs, client, source, destination, index_name)
  elif destination['ouputFormat'] == 'database':
    job_handle = _create_database(request, source, destination, start_time)
  elif source['inputFormat'] == 'altus':
    # BDR copy or DistCP + DDL + Sentry DDL copy
    pass
  elif source['inputFormat'] == 'rdbms':
    if destination['outputFormat'] in ('file', 'table', 'hbase'):
      job_handle = run_sqoop(request, source, destination, start_time)
  else:
    job_handle = _create_table(request, source, destination, start_time)

  request.audit = {
    'operation': 'EXPORT',
    'operationText': 'User %(username)s exported %(inputFormat)s to %(ouputFormat)s: %(name)s' % {
        'username': request.user.username,
        'inputFormat': source['inputFormat'],
        'ouputFormat': destination['ouputFormat'],
        'name': destination['name'],
    },
    'allowed': True
  }

  return JsonResponse(job_handle)

Exemplo n.º 3

0

Exibir arquivo

def importer_submit(request):
    source = json.loads(request.POST.get('source', '{}'))
    outputFormat = json.loads(request.POST.get('destination',
                                               '{}'))['outputFormat']
    destination = json.loads(request.POST.get('destination', '{}'))
    destination['ouputFormat'] = outputFormat  # Workaround a very weird bug
    start_time = json.loads(request.POST.get('start_time', '-1'))

    if source['inputFormat'] == 'file':
        if source['path']:
            path = urllib.unquote(source['path'])
            source['path'] = request.fs.netnormpath(path)
    if destination['ouputFormat'] in ('database', 'table'):
        destination['nonDefaultLocation'] = request.fs.netnormpath(
            destination['nonDefaultLocation']) if destination[
                'nonDefaultLocation'] else destination['nonDefaultLocation']

    if destination['ouputFormat'] == 'index':
        source['columns'] = destination['columns']
        index_name = destination["name"]

        if destination['indexerRunJob']:
            _convert_format(source["format"], inverse=True)
            job_handle = _large_indexing(
                request,
                source,
                index_name,
                start_time=start_time,
                lib_path=destination['indexerJobLibPath'])
        else:
            client = SolrClient(request.user)
            job_handle = _small_indexing(request.user, request.fs, client,
                                         source, destination, index_name)
    elif destination['ouputFormat'] == 'database':
        job_handle = _create_database(request, source, destination, start_time)
    elif source['inputFormat'] == 'rdbms':
        if destination['outputFormat'] in ('file', 'table', 'hbase'):
            job_handle = run_sqoop(request, source, destination, start_time)
    else:
        job_handle = _create_table(request, source, destination, start_time)

    request.audit = {
        'operation': 'EXPORT',
        'operationText':
        'User %(username)s exported %(inputFormat)s to %(ouputFormat)s: %(name)s'
        % {
            'username': request.user.username,
            'inputFormat': source['inputFormat'],
            'ouputFormat': destination['ouputFormat'],
            'name': destination['name'],
        },
        'allowed': True
    }

    return JsonResponse(job_handle)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: api3.py Projeto: hkj123/hue

def importer_submit(request):
    source = json.loads(request.POST.get('source', '{}'))
    outputFormat = json.loads(request.POST.get('destination',
                                               '{}'))['outputFormat']
    destination = json.loads(request.POST.get('destination', '{}'))
    destination['ouputFormat'] = outputFormat  # Workaround a very weird bug
    start_time = json.loads(request.POST.get('start_time', '-1'))

    if source['inputFormat'] == 'file':
        if source['path']:
            path = urllib_unquote(source['path'])
            source['path'] = request.fs.netnormpath(path)

    if destination['ouputFormat'] in ('database', 'table'):
        destination['nonDefaultLocation'] = request.fs.netnormpath(destination['nonDefaultLocation']) \
            if destination['nonDefaultLocation'] else destination['nonDefaultLocation']

    if destination['ouputFormat'] == 'index':
        source['columns'] = destination['columns']
        index_name = destination["name"]

        if destination['indexerRunJob'] or source['inputFormat'] == 'stream':
            _convert_format(source["format"], inverse=True)
            job_handle = _large_indexing(
                request,
                source,
                index_name,
                start_time=start_time,
                lib_path=destination['indexerJobLibPath'],
                destination=destination)
        else:
            client = SolrClient(request.user)
            job_handle = _small_indexing(request.user, request.fs, client,
                                         source, destination, index_name)
    elif source['inputFormat'] in (
            'stream', 'connector') or destination['ouputFormat'] == 'stream':
        args = {
            'source': source,
            'destination': destination,
            'start_time': start_time,
            'dry_run': request.POST.get('show_command')
        }
        api = FlinkIndexer(request.user, request.fs)

        job_nb = api.create_table_from_kafka(**args)

        if request.POST.get('show_command'):
            job_handle = {'status': 0, 'commands': job_nb}
        else:
            job_handle = job_nb.execute(request, batch=False)
    elif source['inputFormat'] == 'altus':
        # BDR copy or DistCP + DDL + Sentry DDL copy
        pass
    elif source['inputFormat'] == 'rdbms':
        if destination['outputFormat'] in ('database', 'file', 'table',
                                           'hbase'):
            job_handle = run_sqoop(request, source, destination, start_time)
    elif destination['ouputFormat'] == 'database':
        job_handle = _create_database(request, source, destination, start_time)
    elif destination['ouputFormat'] == 'big-table':
        args = {
            'source': source,
            'destination': destination,
            'start_time': start_time,
            'dry_run': request.POST.get('show_command')
        }
        api = PhoenixIndexer(request.user, request.fs)

        job_nb = api.create_table_from_file(**args)

        if request.POST.get('show_command'):
            job_handle = {'status': 0, 'commands': job_nb}
        else:
            job_handle = job_nb.execute(request, batch=False)
    else:
        job_handle = _create_table(request, source, destination, start_time)

    request.audit = {
        'operation': 'EXPORT',
        'operationText':
        'User %(username)s exported %(inputFormat)s to %(ouputFormat)s: %(name)s'
        % {
            'username': request.user.username,
            'inputFormat': source['inputFormat'],
            'ouputFormat': destination['ouputFormat'],
            'name': destination['name'],
        },
        'allowed': True
    }

    return JsonResponse(job_handle)