def autocomplete(self, snippet, database=None, table=None, column=None, nested=None): response = {} try: if database is None: response['databases'] = ['default'] elif table is None: response['tables_meta'] = get_topics() else: response = { u'status': 0, u'comment': u'test test test 22', u'hdfs_link': u'/filebrowser/view=/user/hive/warehouse/web_logs', u'extended_columns': [ {u'comment': u'', u'type': u'bigint', u'name': u'_version_'}, {u'comment': u'The app', u'type': u'string', u'name': u'app'}, {u'comment': u'test test test 22', u'type': u'smallint', u'name': u'bytes'}, {u'comment': u'The citi', u'type': u'string', u'name': u'city'}, {u'comment': u'', u'type': u'string', u'name': u'client_ip'}, {u'comment': u'', u'type': u'tinyint', u'name': u'code'}, {u'comment': u'', u'type': u'string', u'name': u'country_code'}, {u'comment': u'', u'type': u'string', u'name': u'country_code3'}, {u'comment': u'', u'type': u'string', u'name': u'country_name'}, {u'comment': u'', u'type': u'string', u'name': u'device_family'}, {u'comment': u'', u'type': u'string', u'name': u'extension'}, {u'comment': u'', u'type': u'float', u'name': u'latitude'}, {u'comment': u'', u'type': u'float', u'name': u'longitude'}, {u'comment': u'', u'type': u'string', u'name': u'method'}, {u'comment': u'', u'type': u'string', u'name': u'os_family'}, {u'comment': u'', u'type': u'string', u'name': u'os_major'}, {u'comment': u'', u'type': u'string', u'name': u'protocol'}, {u'comment': u'', u'type': u'string', u'name': u'record'}, {u'comment': u'', u'type': u'string', u'name': u'referer'}, {u'comment': u'', u'type': u'bigint', u'name': u'region_code'}, {u'comment': u'', u'type': u'string', u'name': u'request'}, {u'comment': u'', u'type': u'string', u'name': u'subapp'}, {u'comment': u'', u'type': u'string', u'name': u'time'}, {u'comment': u'', u'type': u'string', u'name': u'url'}, {u'comment': u'', u'type': u'string', u'name': u'user_agent'}, {u'comment': u'', u'type': u'string', u'name': u'user_agent_family'}, {u'comment': u'', u'type': u'string', u'name': u'user_agent_major'}, {u'comment': u'', u'type': u'string', u'name': u'id'}, {u'comment': u'', u'type': u'string', u'name': u'date'} ], u'support_updates': False, u'partition_keys': [ {u'type': u'string', u'name': u'date'} ], u'columns': [u'_version_', u'app', u'bytes', u'city', u'client_ip', u'code', u'country_code', u'country_code3', u'country_name', u'device_family', u'extension', u'latitude', u'longitude', u'method', u'os_family', u'os_major', u'protocol', u'record', u'referer', u'region_code', u'request', u'subapp', u'time', u'url', u'user_agent', u'user_agent_family', u'user_agent_major', u'id', u'date'], u'is_view': False } except Exception as e: LOG.warn('Autocomplete data fetching error: %s' % e) response['code'] = 500 response['error'] = e.message return response
def guess_format(request): file_format = json.loads(request.POST.get('fileFormat', '{}')) if file_format['inputFormat'] == 'file': path = urllib_unquote(file_format["path"]) indexer = MorphlineIndexer(request.user, request.fs) if not request.fs.isfile(path): raise PopupException( _('Path %(path)s is not a file') % file_format) stream = request.fs.open(path) format_ = indexer.guess_format( {"file": { "stream": stream, "name": path }}) _convert_format(format_) elif file_format['inputFormat'] == 'table': db = dbms.get(request.user) try: table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) except Exception as e: raise PopupException( e.message if hasattr(e, 'message') and e.message else e) storage = {} for delim in table_metadata.storage_details: if delim['data_type']: if '=' in delim['data_type']: key, val = delim['data_type'].split('=', 1) storage[key] = val else: storage[delim['data_type']] = delim['comment'] if table_metadata.details['properties']['format'] == 'text': format_ = { "quoteChar": "\"", "recordSeparator": '\\n', "type": "csv", "hasHeader": False, "fieldSeparator": storage.get('field.delim', ',') } elif table_metadata.details['properties']['format'] == 'parquet': format_ = { "type": "parquet", "hasHeader": False, } else: raise PopupException( 'Hive table format %s is not supported.' % table_metadata.details['properties']['format']) elif file_format['inputFormat'] == 'query': format_ = { "quoteChar": "\"", "recordSeparator": "\\n", "type": "csv", "hasHeader": False, "fieldSeparator": "\u0001" } elif file_format['inputFormat'] == 'rdbms': format_ = {"type": "csv"} elif file_format['inputFormat'] == 'stream': if file_format['streamSelection'] == 'kafka': format_ = { "type": "json", # "fieldSeparator": ",", # "hasHeader": True, # "quoteChar": "\"", # "recordSeparator": "\\n", 'topics': get_topics(request.user) } elif file_format['streamSelection'] == 'flume': format_ = { "type": "csv", "fieldSeparator": ",", "hasHeader": True, "quoteChar": "\"", "recordSeparator": "\\n" } elif file_format['inputFormat'] == 'connector': if file_format['connectorSelection'] == 'sfdc': sf = Salesforce(username=file_format['streamUsername'], password=file_format['streamPassword'], security_token=file_format['streamToken']) format_ = { "type": "csv", "fieldSeparator": ",", "hasHeader": True, "quoteChar": "\"", "recordSeparator": "\\n", 'objects': [ sobject['name'] for sobject in sf.restful('sobjects/')['sobjects'] if sobject['queryable'] ] } else: raise PopupException( _('Input format %(inputFormat)s connector not recognized: $(connectorSelection)s' ) % file_format) else: raise PopupException( _('Input format not recognized: %(inputFormat)s') % file_format) format_['status'] = 0 return JsonResponse(format_)
storage[key] = val else: storage[delim['data_type']] = delim['comment'] if table_metadata.details['properties']['format'] == 'text': format_ = {"quoteChar": "\"", "recordSeparator": '\\n', "type": "csv", "hasHeader": False, "fieldSeparator": storage.get('field.delim', ',')} elif table_metadata.details['properties']['format'] == 'parquet': format_ = {"type": "parquet", "hasHeader": False,} else: raise PopupException('Hive table format %s is not supported.' % table_metadata.details['properties']['format']) elif file_format['inputFormat'] == 'query': format_ = {"quoteChar": "\"", "recordSeparator": "\\n", "type": "csv", "hasHeader": False, "fieldSeparator": "\u0001"} elif file_format['inputFormat'] == 'rdbms': format_ = RdbmsIndexer(request.user, file_format['rdbmsType']).guess_format() elif file_format['inputFormat'] == 'stream': if file_format['streamSelection'] == 'kafka': format_ = {"type": "csv", "fieldSeparator": ",", "hasHeader": True, "quoteChar": "\"", "recordSeparator": "\\n", 'topics': get_topics()} elif file_format['streamSelection'] == 'sfdc': sf = Salesforce( username=file_format['streamUsername'], password=file_format['streamPassword'], security_token=file_format['streamToken'] ) format_ = {"type": "csv", "fieldSeparator": ",", "hasHeader": True, "quoteChar": "\"", "recordSeparator": "\\n", 'objects': [sobject['name'] for sobject in sf.restful('sobjects/')['sobjects'] if sobject['queryable']]} format_['status'] = 0 return JsonResponse(format_) def guess_field_types(request): file_format = json.loads(request.POST.get('fileFormat', '{}'))
"type": "csv", "hasHeader": False, "fieldSeparator": "\u0001" } elif file_format['inputFormat'] == 'rdbms': format_ = RdbmsIndexer(request.user, file_format['rdbmsType']).guess_format() elif file_format['inputFormat'] == 'stream': if file_format['streamSelection'] == 'kafka': format_ = { "type": "csv", "fieldSeparator": ",", "hasHeader": True, "quoteChar": "\"", "recordSeparator": "\\n", 'topics': get_topics() } elif file_format['streamSelection'] == 'sfdc': sf = Salesforce(username=file_format['streamUsername'], password=file_format['streamPassword'], security_token=file_format['streamToken']) format_ = { "type": "csv", "fieldSeparator": ",", "hasHeader": True, "quoteChar": "\"", "recordSeparator":
def guess_format(request): file_format = json.loads(request.POST.get('fileFormat', '{}')) file_type = file_format['file_type'] path = urllib_unquote(file_format["path"]) if sys.version_info[0] < 3 and (file_type == 'excel' or path[-3:] == 'xls' or path[-4:] == 'xlsx'): return JsonResponse({ 'status': -1, 'message': 'Python2 based Hue does not support Excel file importer' }) if file_format['inputFormat'] == 'localfile': if file_type == 'excel': format_ = {"type": "excel", "hasHeader": True} else: format_ = { "quoteChar": "\"", "recordSeparator": '\\n', "type": "csv", "hasHeader": True, "fieldSeparator": "," } elif file_format['inputFormat'] == 'file': if path[-3:] == 'xls' or path[-4:] == 'xlsx': file_obj = request.fs.open(path) if path[-3:] == 'xls': df = pd.read_excel(file_obj.read(1024 * 1024 * 1024), engine='xlrd') else: df = pd.read_excel(file_obj.read(1024 * 1024 * 1024), engine='openpyxl') _csv_data = df.to_csv(index=False) path = excel_to_csv_file_name_change(path) request.fs.create(path, overwrite=True, data=_csv_data) indexer = MorphlineIndexer(request.user, request.fs) if not request.fs.isfile(path): raise PopupException( _('Path %(path)s is not a file') % file_format) stream = request.fs.open(path) format_ = indexer.guess_format( {"file": { "stream": stream, "name": path }}) _convert_format(format_) if file_format["path"][-3:] == 'xls' or file_format["path"][ -4:] == 'xlsx': format_ = { "quoteChar": "\"", "recordSeparator": '\\n', "type": "excel", "hasHeader": True, "fieldSeparator": "," } elif file_format['inputFormat'] == 'table': db = dbms.get(request.user) try: table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) except Exception as e: raise PopupException( e.message if hasattr(e, 'message') and e.message else e) storage = {} for delim in table_metadata.storage_details: if delim['data_type']: if '=' in delim['data_type']: key, val = delim['data_type'].split('=', 1) storage[key] = val else: storage[delim['data_type']] = delim['comment'] if table_metadata.details['properties']['format'] == 'text': format_ = { "quoteChar": "\"", "recordSeparator": '\\n', "type": "csv", "hasHeader": False, "fieldSeparator": storage.get('field.delim', ',') } elif table_metadata.details['properties']['format'] == 'parquet': format_ = { "type": "parquet", "hasHeader": False, } else: raise PopupException( 'Hive table format %s is not supported.' % table_metadata.details['properties']['format']) elif file_format['inputFormat'] == 'query': format_ = { "quoteChar": "\"", "recordSeparator": "\\n", "type": "csv", "hasHeader": False, "fieldSeparator": "\u0001" } elif file_format['inputFormat'] == 'rdbms': format_ = {"type": "csv"} elif file_format['inputFormat'] == 'stream': if file_format['streamSelection'] == 'kafka': format_ = { "type": "json", # "fieldSeparator": ",", # "hasHeader": True, # "quoteChar": "\"", # "recordSeparator": "\\n", 'topics': get_topics(request.user) } elif file_format['streamSelection'] == 'flume': format_ = { "type": "csv", "fieldSeparator": ",", "hasHeader": True, "quoteChar": "\"", "recordSeparator": "\\n" } elif file_format['inputFormat'] == 'connector': if file_format['connectorSelection'] == 'sfdc': sf = Salesforce(username=file_format['streamUsername'], password=file_format['streamPassword'], security_token=file_format['streamToken']) format_ = { "type": "csv", "fieldSeparator": ",", "hasHeader": True, "quoteChar": "\"", "recordSeparator": "\\n", 'objects': [ sobject['name'] for sobject in sf.restful('sobjects/')['sobjects'] if sobject['queryable'] ] } else: raise PopupException( _('Input format %(inputFormat)s connector not recognized: $(connectorSelection)s' ) % file_format) else: raise PopupException( _('Input format not recognized: %(inputFormat)s') % file_format) format_['status'] = 0 return JsonResponse(format_)