示例#1
0
文件: api3.py 项目: zhengwei2020/hue
def _envelope_job(request,
                  file_format,
                  destination,
                  start_time=None,
                  lib_path=None):
    collection_name = destination['name']
    indexer = EnvelopeIndexer(request.user, request.fs)

    lib_path = None  # Todo optional input field
    input_path = None

    if file_format['inputFormat'] == 'table':
        db = dbms.get(request.user)
        table_metadata = db.get_table(database=file_format['databaseName'],
                                      table_name=file_format['tableName'])
        input_path = table_metadata.path_location
    elif file_format['inputFormat'] == 'file':
        input_path = file_format["path"]
        properties = {'input_path': input_path, 'format': 'csv'}
    elif file_format['inputFormat'] == 'stream' and file_format[
            'streamSelection'] == 'flume':
        pass
    elif file_format['inputFormat'] == 'stream':
        if file_format['streamSelection'] == 'kafka':
            manager = ManagerApi()
            properties = {
                "brokers": manager.get_kafka_brokers(),
                "topics": file_format['kafkaSelectedTopics'],
                "kafkaFieldType": file_format['kafkaFieldType'],
                "kafkaFieldDelimiter": file_format['kafkaFieldDelimiter'],
            }

            if file_format.get(
                    'kafkaSelectedTopics') == 'NavigatorAuditEvents':
                schema_fields = MorphlineIndexer.get_kept_field_list(
                    file_format['sampleCols'])
                properties.update({
                    "kafkaFieldNames":
                    ', '.join([_field['name'] for _field in schema_fields]),
                    "kafkaFieldTypes":
                    ', '.join([_field['type'] for _field in schema_fields])
                })
            else:
                properties.update({
                    "kafkaFieldNames":
                    file_format['kafkaFieldNames'],
                    "kafkaFieldTypes":
                    file_format['kafkaFieldTypes']
                })

            if True:
                properties['window'] = ''
            else:  # For "KafkaSQL"
                properties['window'] = '''
            window {
                enabled = true
                milliseconds = 60000
            }'''
    elif file_format['inputFormat'] == 'connector':
        if file_format['streamSelection'] == 'flume':
            properties = {
                'streamSelection':
                file_format['streamSelection'],
                'channelSourceHosts':
                file_format['channelSourceHosts'],
                'channelSourceSelectedHosts':
                file_format['channelSourceSelectedHosts'],
                'channelSourcePath':
                file_format['channelSourcePath'],
            }
        else:
            # sfdc
            properties = {
                'streamSelection': file_format['streamSelection'],
                'streamUsername': file_format['streamUsername'],
                'streamPassword': file_format['streamPassword'],
                'streamToken': file_format['streamToken'],
                'streamEndpointUrl': file_format['streamEndpointUrl'],
                'streamObject': file_format['streamObject'],
            }

    if destination['outputFormat'] == 'table':
        if destination['isTargetExisting']:  # Todo: check if format matches
            pass
        else:
            destination['importData'] = False  # Avoid LOAD DATA
            if destination['tableFormat'] == 'kudu':
                properties['kafkaFieldNames'] = properties[
                    'kafkaFieldNames'].lower(
                    )  # Kudu names should be all lowercase
            # Create table
            if not request.POST.get('show_command'):
                SQLIndexer(user=request.user,
                           fs=request.fs).create_table_from_a_file(
                               file_format, destination).execute(request)

        if destination['tableFormat'] == 'kudu':
            manager = ManagerApi()
            properties["output_table"] = "impala::%s" % collection_name
            properties["kudu_master"] = manager.get_kudu_master()
        else:
            properties['output_table'] = collection_name
    elif destination['outputFormat'] == 'stream':
        manager = ManagerApi()
        properties['brokers'] = manager.get_kafka_brokers()
        properties['topics'] = file_format['kafkaSelectedTopics']
        properties['kafkaFieldDelimiter'] = file_format['kafkaFieldDelimiter']
    elif destination['outputFormat'] == 'file':
        properties['path'] = file_format["path"]
        if file_format['inputFormat'] == 'stream':
            properties['format'] = 'csv'
        else:
            properties['format'] = file_format['tableFormat']  # or csv
    elif destination['outputFormat'] == 'index':
        properties['collectionName'] = collection_name
        properties['connection'] = SOLR_URL.get()

    properties["app_name"] = 'Data Ingest'
    properties["inputFormat"] = file_format['inputFormat']
    properties["ouputFormat"] = destination['ouputFormat']
    properties["streamSelection"] = file_format["streamSelection"]

    configs = indexer.generate_config(properties)

    if request.POST.get('show_command'):
        return {'status': 0, 'commands': configs['envelope.conf']}
    else:
        return indexer.run(request,
                           collection_name,
                           configs,
                           input_path,
                           start_time=start_time,
                           lib_path=lib_path)
示例#2
0
def _envelope_job(request, file_format, destination, start_time=None, lib_path=None):
  collection_name = destination['name']
  indexer = EnvelopeIndexer(request.user, request.fs)

  lib_path = '/tmp/envelope-0.5.0.jar'
  input_path = None

  if file_format['inputFormat'] == 'table':
    db = dbms.get(request.user)
    table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName'])
    input_path = table_metadata.path_location
  elif file_format['inputFormat'] == 'file':
    input_path = '${nameNode}%s' % file_format["path"]
    properties = {
      'format': 'json'
    }
  elif file_format['inputFormat'] == 'stream':
    if file_format['streamSelection'] == 'sfdc':
      properties = {
        'streamSelection': file_format['streamSelection'],
        'streamUsername': file_format['streamUsername'],
        'streamPassword': file_format['streamPassword'],
        'streamToken': file_format['streamToken'],
        'streamEndpointUrl': file_format['streamEndpointUrl'],
        'streamObject': file_format['streamObject'],
      }
    elif file_format['streamSelection'] == 'kafka':
      manager = ManagerApi()
      properties = {
        "brokers": manager.get_kafka_brokers(),
        "output_table": "impala::%s" % collection_name,
        "topics": file_format['kafkaSelectedTopics'],
        "kafkaFieldType": file_format['kafkaFieldType'],
        "kafkaFieldDelimiter": file_format['kafkaFieldDelimiter'],
        "kafkaFieldNames": file_format['kafkaFieldNames'],
        "kafkaFieldTypes": file_format['kafkaFieldTypes']
      }

    if destination['outputFormat'] == 'table':
      if destination['isTargetExisting']:
        # Todo: check if format matches
        pass
      else:
        sql = SQLIndexer(user=request.user, fs=request.fs).create_table_from_a_file(file_format, destination).get_str()
        print sql
      if destination['tableFormat'] == 'kudu':
        manager = ManagerApi()
        properties["output_table"] = "impala::%s" % collection_name
        properties["kudu_master"] = manager.get_kudu_master()
      else:
        properties['output_table'] = collection_name
    elif destination['outputFormat'] == 'file':
      properties['path'] = file_format["path"]
      properties['format'] = file_format['tableFormat'] # or csv
    elif destination['outputFormat'] == 'index':
      properties['collectionName'] = collection_name
      properties['connection'] = SOLR_URL.get()
      if destination['isTargetExisting']:
        # Todo: check if format matches
        pass
      else:
        client = SolrClient(request.user)
        kwargs = {}
        _create_solr_collection(request.user, request.fs, client, destination, collection_name, kwargs)

  properties["app_name"] = 'Data Ingest'
  properties["inputFormat"] = file_format['inputFormat']
  properties["ouputFormat"] = destination['ouputFormat']
  properties["streamSelection"] = file_format["streamSelection"]

  envelope = indexer.generate_config(properties)

  return indexer.run(request, collection_name, envelope, input_path, start_time=start_time, lib_path=lib_path)
示例#3
0
def _envelope_job(request,
                  file_format,
                  destination,
                  start_time=None,
                  lib_path=None):
    collection_name = destination['name']
    indexer = EnvelopeIndexer(request.user, request.fs)

    lib_path = None  # Todo optional input field
    input_path = None

    if file_format['inputFormat'] == 'table':
        db = dbms.get(request.user)
        table_metadata = db.get_table(database=file_format['databaseName'],
                                      table_name=file_format['tableName'])
        input_path = table_metadata.path_location
    elif file_format['inputFormat'] == 'file':
        input_path = file_format["path"]
        properties = {'input_path': input_path, 'format': 'csv'}
    elif file_format['inputFormat'] == 'stream' and file_format[
            'streamSelection'] == 'flume':
        pass
    elif file_format['inputFormat'] in ('stream', 'sfdc'):
        if file_format['inputFormat'] == 'sfdc':
            properties = {
                'streamSelection': file_format['streamSelection'],
                'streamUsername': file_format['streamUsername'],
                'streamPassword': file_format['streamPassword'],
                'streamToken': file_format['streamToken'],
                'streamEndpointUrl': file_format['streamEndpointUrl'],
                'streamObject': file_format['streamObject'],
            }
        elif file_format['streamSelection'] == 'kafka':
            manager = ManagerApi()
            properties = {
                "brokers": manager.get_kafka_brokers(),
                "topics": file_format['kafkaSelectedTopics'],
                "kafkaFieldType": file_format['kafkaFieldType'],
                "kafkaFieldDelimiter": file_format['kafkaFieldDelimiter'],
                "kafkaFieldNames": file_format['kafkaFieldNames'],
                "kafkaFieldTypes": file_format['kafkaFieldTypes']
            }

            if True:
                properties['window'] = ''
            else:  # For "KafkaSQL"
                properties['window'] = '''
            window {
                enabled = true
                milliseconds = 60000
            }'''

        if destination['outputFormat'] == 'table':
            if destination['isTargetExisting']:
                # Todo: check if format matches
                pass
            else:
                sql = SQLIndexer(user=request.user,
                                 fs=request.fs).create_table_from_a_file(
                                     file_format, destination).get_str()
                print sql
            if destination['tableFormat'] == 'kudu':
                manager = ManagerApi()
                properties["output_table"] = "impala::%s" % collection_name
                properties["kudu_master"] = manager.get_kudu_master()
            else:
                properties['output_table'] = collection_name
        elif destination['outputFormat'] == 'file':
            properties['path'] = file_format["path"]
            if file_format['inputFormat'] == 'stream':
                properties['format'] = 'csv'
            else:
                properties['format'] = file_format['tableFormat']  # or csv
        elif destination['outputFormat'] == 'index':
            properties['collectionName'] = collection_name
            properties['connection'] = SOLR_URL.get()


# No needed anymore
#       if destination['isTargetExisting']:
#         # Todo: check if format matches
#         pass
#       else:
#         client = SolrClient(request.user)
#         kwargs = {}
#         _create_solr_collection(request.user, request.fs, client, destination, collection_name, kwargs)

    if destination['outputFormat'] == 'stream':
        manager = ManagerApi()
        properties['brokers'] = manager.get_kafka_brokers()
        properties['topics'] = file_format['kafkaSelectedTopics']
        properties['kafkaFieldDelimiter'] = file_format['kafkaFieldDelimiter']

    properties["app_name"] = 'Data Ingest'
    properties["inputFormat"] = file_format['inputFormat']
    properties["ouputFormat"] = destination['ouputFormat']
    properties["streamSelection"] = file_format["streamSelection"]

    envelope = indexer.generate_config(properties)

    return indexer.run(request,
                       collection_name,
                       envelope,
                       input_path,
                       start_time=start_time,
                       lib_path=lib_path)