def admin_collection_template(request, collection_id): hue_collection = Collection.objects.get(id=collection_id) solr_collection = SolrApi(SOLR_URL.get(), request.user).collection_or_core(hue_collection) sample_data = {} if request.method == 'POST': hue_collection.result.update_from_post(request.POST) hue_collection.result.save() return HttpResponse(json.dumps({}), mimetype="application/json") solr_query = {} solr_query['collection'] = hue_collection.name solr_query['q'] = '' solr_query['fq'] = '' solr_query['rows'] = 5 solr_query['start'] = 0 solr_query['facets'] = 0 try: response = SolrApi(SOLR_URL.get(), request.user).query(solr_query, hue_collection) sample_data = json.dumps(response["response"]["docs"]) except PopupException, e: message = e try: message = json.loads(e.message.message)['error']['msg'] # Try to get the core error except: pass request.error(_('No preview available, some facets are invalid: %s') % message) LOG.exception(e)
def create_collection(self, name, fields, unique_key_field='id', df='text'): """ Create solr collection or core and instance dir. Create schema.xml file so that we can set UniqueKey field. """ if self.is_solr_cloud_mode(): # solrcloud mode # Need to remove path afterwards tmp_path, solr_config_path = utils.copy_configs(fields, unique_key_field, df, True) # Create instance directory. solrctl_path = get_solrctl_path() process = subprocess.Popen([solrctl_path, "instancedir", "--create", name, solr_config_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={ 'SOLR_ZK_ENSEMBLE': conf.SOLR_ZK_ENSEMBLE.get() }) status = process.wait() # Don't want directories laying around shutil.rmtree(tmp_path) if status != 0: LOG.error("Could not create instance directory.\nOutput: %s\nError: %s" % process.communicate()) raise PopupException(_('Could not create instance directory. ' 'Check if solr_zk_ensemble and solrctl_path are correct in Hue config [indexer].')) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_collection(name): # Delete instance directory if we couldn't create a collection. process = subprocess.Popen([solrctl_path, "instancedir", "--delete", name], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={ 'SOLR_ZK_ENSEMBLE': conf.SOLR_ZK_ENSEMBLE.get() }) if process.wait() != 0: LOG.error("Cloud not delete collection.\nOutput: %s\nError: %s" % process.communicate()) raise PopupException(_('Could not create collection. Check error logs for more info.')) else: # Non-solrcloud mode # Create instance directory locally. instancedir = os.path.join(conf.CORE_INSTANCE_DIR.get(), name) if os.path.exists(instancedir): raise PopupException(_("Instance directory %s already exists! Please remove it from the file system.") % instancedir) tmp_path, solr_config_path = utils.copy_configs(fields, unique_key_field, df, False) shutil.move(solr_config_path, instancedir) shutil.rmtree(tmp_path) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_core(name, instancedir): # Delete instance directory if we couldn't create a collection. shutil.rmtree(instancedir) raise PopupException(_('Could not create collection. Check error logs for more info.'))
def fields_data(self, user): schema_fields = SolrApi(SOLR_URL.get(), user).fields(self.name) schema_fields = schema_fields['schema']['fields'] dynamic_fields = SolrApi(SOLR_URL.get(), user).fields(self.name, dynamic=True) dynamic_fields = dynamic_fields['fields'] schema_fields.update(dynamic_fields) return sorted([{'name': str(field), 'type': str(attributes.get('type', ''))} for field, attributes in schema_fields.iteritems()])
def get_fields(self, collection_or_core_name): try: field_data = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()).fields(collection_or_core_name) fields = self._format_flags(field_data['schema']['fields']) except: LOG.exception(_('Could not fetch fields for collection %s.') % collection_or_core_name) raise PopupException(_('Could not fetch fields for collection %s. See logs for more info.') % collection_or_core_name) try: uniquekey = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()).uniquekey(collection_or_core_name) except: LOG.exception(_('Could not fetch unique key for collection %s.') % collection_or_core_name) raise PopupException(_('Could not fetch unique key for collection %s. See logs for more info.') % collection_or_core_name) return uniquekey, fields
def create_collection(self, name, fields, unique_key_field='id', df='text'): """ Create solr collection or core and instance dir. Create schema.xml file so that we can set UniqueKey field. """ if self.is_solr_cloud_mode(): # solrcloud mode # Need to remove path afterwards tmp_path, solr_config_path = copy_configs(fields, unique_key_field, df, True) zc = ZookeeperClient(hosts=get_solr_ensemble(), read_only=False) root_node = '%s/%s' % (ZK_SOLR_CONFIG_NAMESPACE, name) config_root_path = '%s/%s' % (solr_config_path, 'conf') try: zc.copy_path(root_node, config_root_path) except Exception, e: zc.delete_path(root_node) raise PopupException(_('Error in copying Solr configurations.'), detail=e) # Don't want directories laying around shutil.rmtree(tmp_path) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_collection(name): # Delete instance directory if we couldn't create a collection. try: zc.delete_path(root_node) except Exception, e: raise PopupException(_('Error in deleting Solr configurations.'), detail=e)
def get_terms(request): result = {"status": -1, "message": "Error"} try: collection = json.loads(request.POST.get("collection", "{}")) analysis = json.loads(request.POST.get("analysis", "{}")) field = analysis["name"] properties = { "terms.limit": 25, "terms.prefix": analysis["terms"]["prefix"] # lower # limit # mincount # maxcount } result["terms"] = SolrApi(SOLR_URL.get(), request.user).terms(collection["name"], field, properties) result["terms"] = pairwise2(field, [], result["terms"]["terms"][field]) result["status"] = 0 result["message"] = "" except Exception, e: result["message"] = force_unicode(e) if "not currently supported" in result["message"]: result["status"] = 1 result["message"] = _("This field does not support stats")
def _fetch_collections(request): from libsolr.api import SolrApi from search.conf import SOLR_URL path = request.GET['path'] item = None name = None if path: item = path if '/' in path: item, name = path.split('/') api = SolrApi(SOLR_URL.get(), request.user) if not item: return {"databases": ["collections", "configs", "admin"]} elif item and name: return {"authorizable_link": "/indexer/#edit/%s" % name, "extended_columns": [], "columns": [], "partition_keys": []} elif item == 'collections': return {"tables_meta": [{"comment": None, "type": "Table", "name": col} for col in api.collections2()]} elif item == 'configs': return {"tables_meta": [{"comment": None, "type": "Table", "name": conf} for conf in api.configs()]} elif item == 'admin': return {"tables_meta": [{"comment": None, "type": "Table", "name": 'collections'}, {"comment": None, "type": "Table", "name": "cores"}]} else: raise PopupException(_('Authorizable %s could not be retrieved') % path)
def test_query(self): collection = Collection2(user=self.user, name='log_analytics_demo') collection = json.loads(collection.get_json(self.user)) query = {'qs': [{'q': ''}], 'fqs': [], 'start': 0} SolrApi(SOLR_URL.get(), self.user).query(collection['collection'], query)
def update_document(request): result = {'status': -1, 'message': 'Error'} if not can_edit_index(request.user): result['message'] = _('Permission to edit the document denied') return JsonResponse(result) try: collection = json.loads(request.POST.get('collection', '{}')) document = json.loads(request.POST.get('document', '{}')) doc_id = request.POST.get('id') if document['hasChanged']: edits = { "id": doc_id, } version = None # If there is a version, use it to avoid potential concurrent update conflicts for field in document['details']: if field['hasChanged']: edits[field['key']] = {"set": field['value']} if field['key'] == '_version_': version = field['value'] if SolrApi(SOLR_URL.get(), request.user).update(collection['name'], json.dumps([edits]), content_type='json', version=version): result['status'] = 0 result['message'] = _('Document successfully updated.') else: result['status'] = 0 result['message'] = _('Document has no modifications to change.') except Exception, e: result['message'] = force_unicode(e)
def get_all_indexes(self, show_all=False): indexes = [] try: indexes = self.get_solr_collection().keys() except: pass try: indexes += SolrApi(SOLR_URL.get(), self.user).aliases().keys() except: pass if show_all or not indexes: return indexes + SolrApi(SOLR_URL.get(), self.user).cores().keys() else: return indexes
def get_all_indexes(self, show_all=False): indexes = [] try: indexes = self.get_solr_collections().keys() except: LOG.exception('failed to get indexes') try: indexes += SolrApi(SOLR_URL.get(), self.user).aliases().keys() except: LOG.exception('failed to get index aliases') if show_all or not indexes: return indexes + SolrApi(SOLR_URL.get(), self.user).cores().keys() else: return indexes
def update_data_from_hive(self, db, collection_or_core_name, database, table, columns, indexing_strategy='upload'): """ Add hdfs path contents to index """ # Run a custom hive query and post data to collection from beeswax.server import dbms import tablib api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if indexing_strategy == 'upload': table = db.get_table(database, table) hql = "SELECT %s FROM `%s.%s` %s" % (','.join(columns), database, table.name, db._get_browse_limit_clause(table)) query = dbms.hql_query(hql) try: handle = db.execute_and_wait(query) if handle: result = db.fetch(handle, rows=100) db.close(handle) dataset = tablib.Dataset() dataset.append(columns) for row in result.rows(): dataset.append(row) if not api.update(collection_or_core_name, dataset.csv, content_type='csv'): raise PopupException(_('Could not update index. Check error logs for more info.')) else: raise PopupException(_('Could not update index. Could not fetch any data from Hive.')) except Exception, e: raise PopupException(_('Could not update index.'), detail=e)
def update_data_from_hdfs(self, fs, collection_or_core_name, fields, path, data_type='separated', indexing_strategy='upload', **kwargs): """ Add hdfs path contents to index """ api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if indexing_strategy == 'upload': stats = fs.stats(path) if stats.size > MAX_UPLOAD_SIZE: raise PopupException(_('File size is too large to handle!')) else: # Get fields for filtering unique_key, fields = self.get_fields(collection_or_core_name) fields = [{'name': field, 'type': fields[field]['type']} for field in fields] fh = fs.open(path) if data_type == 'log': # Transform to JSON then update data = json.dumps([value for value in field_values_from_log(fh, fields)]) content_type = 'json' elif data_type == 'separated': data = json.dumps([value for value in field_values_from_separated_file(fh, kwargs.get('separator', ','), kwargs.get('quote_character', '"'), fields)], indent=2) content_type = 'json' else: raise PopupException(_('Could not update index. Unknown type %s') % data_type) fh.close() if not api.update(collection_or_core_name, data, content_type=content_type): raise PopupException(_('Could not update index. Check error logs for more info.')) else: raise PopupException(_('Could not update index. Indexing strategy %s not supported.') % indexing_strategy)
def get_all_indexes(self): indexes = [] try: indexes = self.get_solr_collection().keys() except: pass return indexes + SolrApi(SOLR_URL.get(), self.user).cores().keys()
def _create_facet(collection, user, facet_id, facet_label, facet_field, widget_type): properties = { 'sort': 'desc', 'canRange': False, 'stacked': False, 'limit': 10, 'mincount': 0, 'isDate': False, 'andUp': False, # Not used yet } solr_api = SolrApi(SOLR_URL.get(), user) range_properties = _new_range_facet(solr_api, collection, facet_field, widget_type) if range_properties: facet_type = 'range' properties.update(range_properties) elif widget_type == 'hit-widget': facet_type = 'query' else: facet_type = 'field' if widget_type == 'map-widget': properties['scope'] = 'world' properties['mincount'] = 1 properties['limit'] = 100 return { 'id': facet_id, 'label': facet_label, 'field': facet_field, 'type': facet_type, 'widgetType': widget_type, 'properties': properties }
def get_terms(request): result = {'status': -1, 'message': 'Error'} try: collection = json.loads(request.POST.get('collection', '{}')) analysis = json.loads(request.POST.get('analysis', '{}')) field = analysis['name'] properties = { 'terms.limit': 25, 'terms.prefix': analysis['terms']['prefix'] # lower # limit # mincount # maxcount } result['terms'] = SolrApi(SOLR_URL.get(), request.user).terms(collection['name'], field, properties) result['terms'] = pairwise2(field, [], result['terms']['terms'][field]) result['status'] = 0 result['message'] = '' except Exception, e: result['message'] = force_unicode(e) if 'not currently supported' in result['message']: result['status'] = 1 result['message'] = _('This field does not support stats')
def zkensemble(): """ Try to guess the value if no values are specified. """ from django.conf import settings if 'zookeeper' in settings.INSTALLED_APPS: try: # Backward compatibility until Hue 4 from zookeeper.conf import CLUSTERS clusters = CLUSTERS.get() if clusters['default'].HOST_PORTS.get() != 'localhost:2181': return '%s' % clusters['default'].HOST_PORTS.get() except: LOG.warn('Could not get zookeeper ensemble from the zookeeper app') if 'search' in settings.INSTALLED_APPS: try: from search.conf import SOLR_URL parsed = urlparse(SOLR_URL.get()) return "%s:2181" % (parsed.hostname or 'localhost') except: LOG.warn('Could not get zookeeper ensemble from the search app') return "localhost:2181"
def index(request): hue_collections = Collection.objects.filter(enabled=True) if not hue_collections: if request.user.is_superuser: return admin_collections(request, True) else: return no_collections(request) initial_collection = request.COOKIES.get('hueSearchLastCollection', 0) search_form = QueryForm(request.GET, initial_collection=initial_collection) response = {} error = {} solr_query = {} hue_collection = None if search_form.is_valid(): collection_id = search_form.cleaned_data['collection'] solr_query['q'] = search_form.cleaned_data['query'].encode('utf8') solr_query['fq'] = search_form.cleaned_data['fq'] if search_form.cleaned_data['sort']: solr_query['sort'] = search_form.cleaned_data['sort'] solr_query['rows'] = search_form.cleaned_data['rows'] or 15 solr_query['start'] = search_form.cleaned_data['start'] or 0 solr_query['facets'] = search_form.cleaned_data['facets'] or 1 try: hue_collection = Collection.objects.get(id=collection_id) solr_query['collection'] = hue_collection.name response = SolrApi(SOLR_URL.get(), request.user).query(solr_query, hue_collection) except Exception, e: error['message'] = unicode(str(e), "utf8")
def index(request): hue_collections = Collection.objects.filter(enabled=True) if not hue_collections: if request.user.is_superuser: return admin_collections(request, True) else: return no_collections(request) init_collection = initial_collection(request, hue_collections) search_form = QueryForm(request.GET, initial_collection=init_collection) response = {} error = {} solr_query = {} if search_form.is_valid(): try: collection_id = search_form.cleaned_data['collection'] hue_collection = Collection.objects.get(id=collection_id) solr_query = search_form.solr_query_dict response = SolrApi(SOLR_URL.get(), request.user).query(solr_query, hue_collection) solr_query['total_pages'] = int(math.ceil((float(response['response']['numFound']) / float(solr_query['rows'])))) solr_query['search_time'] = response['responseHeader']['QTime'] except Exception, e: error['title'] = force_unicode(e.title) if hasattr(e, 'title') else '' error['message'] = force_unicode(str(e))
def _create_solr_cloud_collection(self, name, fields, unique_key_field, df): with ZookeeperClient(hosts=get_solr_ensemble(), read_only=False) as zc: root_node = '%s/%s' % (ZK_SOLR_CONFIG_NAMESPACE, name) tmp_path, solr_config_path = copy_configs(fields, unique_key_field, df, True) try: config_root_path = '%s/%s' % (solr_config_path, 'conf') try: zc.copy_path(root_node, config_root_path) except Exception, e: zc.delete_path(root_node) raise PopupException(_('Error in copying Solr configurations.'), detail=e) finally: # Don't want directories laying around shutil.rmtree(tmp_path) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_collection(name): # Delete instance directory if we couldn't create a collection. try: zc.delete_path(root_node) except Exception, e: raise PopupException(_('Error in deleting Solr configurations.'), detail=e) raise PopupException(_('Could not create collection. Check error logs for more info.'))
def get_collections(self): solr_collections = {} solr_aliases = {} solr_cores = {} try: api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if self.is_solr_cloud_mode(): solr_collections = api.collections() for name in solr_collections: solr_collections[name]['isCoreOnly'] = False solr_aliases = api.aliases() for name in solr_aliases: solr_aliases[name] = { 'isCoreOnly': False, 'isAlias': True, 'collections': solr_aliases[name] } solr_cores = api.cores() for name in solr_cores: solr_cores[name]['isCoreOnly'] = True except Exception, e: LOG.warn('No Zookeeper servlet running on Solr server: %s' % e)
def zkensemble(): """ ZooKeeper Ensemble """ from search.conf import SOLR_URL parsed = urlparse(SOLR_URL.get()) return "%s:2181/solr" % (parsed.hostname or 'localhost')
def admin_collection_schema(request, collection_id): hue_collection = Collection.objects.get(id=collection_id) solr_schema = SolrApi(SOLR_URL.get(), request.user).schema(hue_collection.name) content = { 'solr_schema': solr_schema.decode('utf-8') } return HttpResponse(json.dumps(content), mimetype="application/json")
def get_new_collections(self): try: solr_collections = SolrApi(SOLR_URL.get()).collections() for name in Collection.objects.values_list('name', flat=True): solr_collections.pop(name, None) except Exception, e: LOG.warn('No Zookeeper servlet running on Solr server: %s' % e) solr_collections = []
def get_new_cores(self): try: solr_cores = SolrApi(SOLR_URL.get()).cores() for name in Collection.objects.values_list('name', flat=True): solr_cores.pop(name, None) except Exception, e: solr_cores = [] LOG.warn('No Single core setup on Solr server: %s' % e)
def is_solr_cloud_mode(self): api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not hasattr(self, '_solr_cloud_mode'): try: api.collections() setattr(self, '_solr_cloud_mode', True) except: setattr(self, '_solr_cloud_mode', False) return getattr(self, '_solr_cloud_mode')
def get_autocomplete(self): autocomplete = {} try: api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) autocomplete['collections'] = api.collections2() autocomplete['configs'] = api.configs() except Exception, e: LOG.warn('No Zookeeper servlet running on Solr server: %s' % e)
def fields_data(self, user, name): api = SolrApi(SOLR_URL.get(), user) try: schema_fields = api.fields(name) schema_fields = schema_fields['schema']['fields'] except Exception, e: LOG.warn('/luke call did not succeed: %s' % e) fields = api.schema_fields(name) schema_fields = Collection2._make_luke_from_schema_fields(fields)
def is_solr_cloud_mode(self): api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not hasattr(self, '_solr_cloud_mode'): try: api.collections() setattr(self, '_solr_cloud_mode', True) except Exception, e: LOG.info('Non SolrCloud server: %s' % e) setattr(self, '_solr_cloud_mode', False)
def admin_collection_solr_properties(request, collection_id): hue_collection = Collection.objects.get(id=collection_id) solr_collection = SolrApi(SOLR_URL.get(), request.user).collection_or_core(hue_collection) content = render('admin_collection_properties_solr_properties.mako', request, { 'solr_collection': solr_collection, 'hue_collection': hue_collection, }, force_template=True).content return HttpResponse(json.dumps({'content': content}), mimetype="application/json")
def update_document(request): result = {'status': -1, 'message': 'Error'} if not can_edit_index(request.user): result['message'] = _('Permission to edit the document denied') return JsonResponse(result) try: collection = json.loads(request.POST.get('collection', '{}')) document = json.loads(request.POST.get('document', '{}')) doc_id = request.POST.get('id') if document['hasChanged']: edits = { "id": doc_id, } version = None # If there is a version, use it to avoid potential concurrent update conflicts for field in document['details']: if field['hasChanged'] and field['key'] != '_version_': edits[field['key']] = {"set": field['value']} if field['key'] == '_version_': version = field['value'] result['update'] = SolrApi(SOLR_URL.get(), request.user).update( collection['name'], json.dumps([edits]), content_type='json', version=version) result['message'] = _('Document successfully updated.') result['status'] = 0 else: result['status'] = 0 result['message'] = _('Document has no modifications to change.') except RestException, e: try: result['message'] = json.loads(e.message)['error']['msg'] except: LOG.exception('Failed to parse json response') result['message'] = force_unicode(e)
def search(request): response = {} collection = json.loads(request.POST.get('collection', '{}')) query = json.loads(request.POST.get('query', '{}')) query['download'] = 'download' in request.POST if collection: try: response = SolrApi(SOLR_URL.get(), request.user).query(collection, query) response = augment_solr_response(response, collection, query) except RestException, e: try: response['error'] = json.loads(e.message)['error']['msg'] except: LOG.exception('failed to parse json response') response['error'] = force_unicode(e) except Exception, e: raise PopupException(e, title=_('Error while accessing Solr')) response['error'] = force_unicode(e)
def index_fields_dynamic(request): result = {'status': -1, 'message': 'Error'} try: name = request.POST['name'] dynamic_fields = SolrApi(SOLR_URL.get(), request.user).luke(name) result['message'] = '' result['fields'] = [ Collection2._make_field(name, properties) for name, properties in dynamic_fields['fields'].iteritems() if 'dynamicBase' in properties ] result['gridlayout_header_fields'] = [ Collection2._make_gridlayout_header_field({'name': name}, True) for name, properties in dynamic_fields['fields'].iteritems() if 'dynamicBase' in properties ] result['status'] = 0 except Exception, e: result['message'] = force_unicode(e)
def get_range_facet(request): result = {'status': -1, 'message': ''} try: collection = json.loads(request.POST.get('collection', '{}')) # Perms facet = json.loads(request.POST.get('facet', '{}')) action = request.POST.get('action', 'select') solr_api = SolrApi(SOLR_URL.get(), request.user) if action == 'select': properties = _guess_gap(solr_api, collection, facet, facet['properties']['start'], facet['properties']['end']) else: properties = _zoom_range_facet(solr_api, collection, facet) result['properties'] = properties result['status'] = 0 except Exception, e: result['message'] = unicode(str(e), "utf8")
def update_collection(self, name, fields): """ Only create new fields """ api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) # Create only new fields # Fields that already exist, do not overwrite since there is no way to do that, currently. old_field_names = list(api.fields(name)['schema']['fields'].keys()) new_fields = [ field for field in fields if field['name'] not in old_field_names ] new_fields_filtered = [] for field in new_fields: new_field = {} for attribute in [ attribute for attribute in ALLOWED_FIELD_ATTRIBUTES if attribute in field ]: new_field[attribute] = field[attribute] new_fields_filtered.append(new_field) api.add_fields(name, new_fields_filtered)
def _create_solr_cloud_collection(self, name, fields, unique_key_field, df): client = SolrClient(self.user) with ZookeeperClient(hosts=client.get_zookeeper_host(), read_only=False) as zc: root_node = '%s/%s' % (ZK_SOLR_CONFIG_NAMESPACE, name) tmp_path, solr_config_path = copy_configs( fields=fields, unique_key_field=unique_key_field, df=df, solr_cloud_mode=client.is_solr_cloud_mode(), is_solr_six_or_more=client.is_solr_six_or_more(), is_solr_hdfs_mode=client.is_solr_with_hdfs()) try: config_root_path = '%s/%s' % (solr_config_path, 'conf') try: zc.copy_path(root_node, config_root_path) except Exception, e: zc.delete_path(root_node) raise PopupException( _('Error in copying Solr configurations: %s') % e) finally: # Don't want directories laying around shutil.rmtree(tmp_path) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_collection(name): # Delete instance directory if we couldn't create a collection. try: zc.delete_path(root_node) except Exception, e: raise PopupException( _('Error in deleting Solr configurations.'), detail=e) raise PopupException( _('Could not create collection. Check error logs for more info.' ))
def get_range_facet(request): result = {'status': -1, 'message': ''} try: collection = json.loads(request.POST.get('collection', '{}')) facet = json.loads(request.POST.get('facet', '{}')) action = request.POST.get('action', 'select') solr_api = SolrApi(SOLR_URL.get(), request.user) if action == 'select': properties = _guess_gap(solr_api, collection, facet, facet['properties']['start'], facet['properties']['end']) else: properties = _zoom_range_facet(solr_api, collection, facet) # Zoom out result['properties'] = properties result['status'] = 0 except Exception as e: result['message'] = force_unicode(e) return JsonResponse(result)
def get_stats(request): result = {'status': -1, 'message': 'Error'} try: collection = json.loads(request.POST.get('collection', '{}')) query = json.loads(request.POST.get('query', '{}')) analysis = json.loads(request.POST.get('analysis', '{}')) field = analysis['name'] facet = analysis['stats']['facet'] result['stats'] = SolrApi(SOLR_URL.get(), request.user).stats(collection['name'], [field], query, facet) result['status'] = 0 result['message'] = '' except Exception, e: result['message'] = unicode(str(e), "utf8") if 'not currently supported' in result['message']: result['status'] = 1 result['message'] = _('This field does not support stats')
def admin_collection_properties(request, collection_id): hue_collection = Collection.objects.get(id=collection_id) solr_collection = SolrApi(SOLR_URL.get(), request.user).collection_or_core(hue_collection) if request.method == 'POST': collection_form = CollectionForm(request.POST, instance=hue_collection) if collection_form.is_valid(): searcher = SearchController(request.user) hue_collection = collection_form.save(commit=False) hue_collection.is_core_only = not searcher.is_collection(hue_collection.name) hue_collection.save() return redirect(reverse('search:admin_collection_properties', kwargs={'collection_id': hue_collection.id})) else: request.error(_('Errors on the form: %s.') % collection_form.errors) else: collection_form = CollectionForm(instance=hue_collection) return render('admin_collection_properties.mako', request, { 'solr_collection': solr_collection, 'hue_collection': hue_collection, 'collection_form': collection_form, })
def delete_collection(self, name, core): """ Delete solr collection/core and instance dir """ api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) client = SolrClient(self.user) if core: raise PopupException(_('Cannot remove Solr cores.')) if api.remove_collection(name): # Delete instance directory. try: root_node = '%s/%s' % (ZK_SOLR_CONFIG_NAMESPACE, name) with ZookeeperClient(hosts=client.get_zookeeper_host(), read_only=False) as zc: zc.delete_path(root_node) except Exception, e: # Re-create collection so that we don't have an orphan config api.add_collection(name) raise PopupException( _('Error in deleting Solr configurations.'), detail=e)
def get_terms(request): result = {'status': -1, 'message': 'Error'} try: collection = json.loads(request.POST.get('collection', '{}')) analysis = json.loads(request.POST.get('analysis', '{}')) limit = json.loads(request.POST.get('limit', '25')) support_distributed = [ engine for engine in get_engines(request.user) if engine['type'] == 'solr' ][0]['analytics'] field = analysis['name'] properties = { 'terms.limit': limit, 'terms.distrib': str(support_distributed).lower(), # lower # mincount # maxcount } if analysis['terms']['prefix']: properties['terms.regex'] = '.*%(prefix)s.*' % analysis[ 'terms'] # Use regexp instead of case sensitive 'terms.prefix' properties['terms.regex.flag'] = 'case_insensitive' result['terms'] = SolrApi(SOLR_URL.get(), request.user).terms(collection['name'], field, properties) result['terms'] = pairwise2(field, [], result['terms']['terms'][field]) result['status'] = 0 result['message'] = '' except Exception, e: result['message'] = force_unicode(e) if 'not currently supported' in result['message']: result['status'] = 1 result['message'] = _('This field does not support stats')
def get_service_info(service): service_info = {} if service.lower() == 'solr': service_info['url'] = SOLR_URL.get() service_info['security_enabled'] = SOLR_SECURITY_ENABLED.get() if service.lower() == 'oozie': service_info['url'] = OOZIE_URL.get() service_info['security_enabled'] = OOZIE_SECURITY_ENABLED.get() if service.lower() == 'httpfs': hdfs_config = hdfs_conf.HDFS_CLUSTERS['default'] service_info['url'] = hdfs_config.WEBHDFS_URL.get() service_info['security_enabled'] = hdfs_config.SECURITY_ENABLED.get() if service.lower() == 'rm': yarn_cluster = cluster.get_cluster_conf_for_job_submission() service_info['url'] = yarn_cluster.RESOURCE_MANAGER_API_URL.get() service_info['security_enabled'] = yarn_cluster.SECURITY_ENABLED.get() if service.lower() == 'jhs': yarn_cluster = cluster.get_cluster_conf_for_job_submission() service_info['url'] = yarn_cluster.HISTORY_SERVER_API_URL.get() service_info['security_enabled'] = yarn_cluster.SECURITY_ENABLED.get() if service.lower() == 'sparkhs': yarn_cluster = cluster.get_cluster_conf_for_job_submission() service_info['url'] = yarn_cluster.SPARK_HISTORY_SERVER_URL.get() service_info[ 'security_enabled'] = yarn_cluster.SPARK_HISTORY_SERVER_SECURITY_ENABLED.get( ) if 'url' not in service_info: logging.info("Hue does not have %s configured, cannot test %s" % (service, service)) elif service_info['url'] is None: logging.info("Hue does not have %s configured, cannot test %s" % (service, service)) if service_info['url'].endswith('/'): service_info['url'] = service_info['url'][:-1] return service_info
def download(request, format): hue_collections = SearchController(request.user).get_search_collections() if not hue_collections: raise PopupException(_("No collection to download.")) init_collection = initial_collection(request, hue_collections) search_form = QueryForm(request.GET, initial_collection=init_collection) if search_form.is_valid(): try: collection_id = search_form.cleaned_data['collection'] hue_collection = Collection.objects.get(id=collection_id) solr_query = search_form.solr_query_dict response = SolrApi(SOLR_URL.get(), request.user).query(solr_query, hue_collection) LOG.debug('Download results for query %s' % smart_str(solr_query)) return export_download(response, format) except Exception, e: raise PopupException(_("Could not download search results: %s") % e)
def delete_collection(self, name, core): """ Delete solr collection/core and instance dir """ api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if core: raise PopupException(_('Cannot remove Solr cores.')) if api.remove_collection(name): # Delete instance directory. solrctl_path = get_solrctl_path() process = subprocess.Popen([solrctl_path, "instancedir", "--delete", name], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={ 'SOLR_ZK_ENSEMBLE': conf.SOLR_ZK_ENSEMBLE.get() }) if process.wait() != 0: LOG.error("Cloud not delete instance directory.\nOutput stream: %s\nError stream: %s" % process.communicate()) raise PopupException(_('Could not create instance directory. Check error logs for more info.')) else: raise PopupException(_('Could not remove collection. Check error logs for more info.'))
def get_document(request): result = {'status': -1, 'message': 'Error'} try: collection = json.loads(request.POST.get('collection', '{}')) doc_id = request.POST.get('id') if doc_id: result['doc'] = SolrApi(SOLR_URL.get(), request.user).get(collection['name'], doc_id) if result['doc']['doc']: result['status'] = 0 result['message'] = '' else: result['status'] = 1 result['message'] = _('No document was returned by Solr.') else: result['message'] = _('This document does not have any index id.') result['status'] = 1 except Exception, e: result['message'] = unicode(str(e), "utf8")
def search(request): response = {} collection = json.loads(request.POST.get('collection', '{}')) query = json.loads(request.POST.get('query', '{}')) query['download'] = 'download' in request.POST if collection: try: response = SolrApi(SOLR_URL.get(), request.user).query(collection, query) response = augment_solr_response(response, collection, query) except RestException, e: try: message = json.loads(e.message) response['error'] = message['error'].get( 'msg', message['error']['trace']) except Exception, e2: LOG.exception('failed to extract json message: %s' % force_unicode(e2)) LOG.exception('failed to parse json response: %s' % force_unicode(e)) response['error'] = force_unicode(e)
def __init__(self, solr_url=None, user=None, security_enabled=False, ssl_cert_ca_verify=SSL_CERT_CA_VERIFY.get()): if solr_url is None and hasattr(SOLR_URL, 'get'): solr_url = SOLR_URL.get() if solr_url: self._url = solr_url self._user = user self._client = HttpClient(self._url, logger=LOG) self.security_enabled = security_enabled or SECURITY_ENABLED.get() if self.security_enabled: self._client.set_kerberos_auth() self._client.set_verify(ssl_cert_ca_verify) self._root = resource.Resource(self._client) # The Kerberos handshake requires two requests in order to authenticate, # but if our first request is a PUT/POST, it might flat-out reject the # first request if the body is too large. So, connect here in order to get # a cookie so future PUT/POSTs will be pre-authenticated. if self.security_enabled: self._root.invoke('HEAD', '/')
def query_suggest(request): if request.method != 'POST': raise PopupException(_('POST request required.')) collection = json.loads(request.POST.get('collection', '{}')) query = request.POST.get('query', '') result = {'status': -1, 'message': ''} solr_query = {} solr_query['q'] = query solr_query['dictionary'] = collection['suggest']['dictionary'] try: response = SolrApi(SOLR_URL.get(), request.user).suggest(collection['name'], solr_query) result['response'] = response result['status'] = 0 except Exception as e: result['message'] = force_unicode(e) return JsonResponse(result)
def create_collection(self, name, fields, unique_key_field='id', df='text'): """ Create solr collection or core and instance dir. Create schema.xml file so that we can set UniqueKey field. """ if self.is_solr_cloud_mode(): # solrcloud mode # Need to remove path afterwards tmp_path, solr_config_path = copy_configs(fields, unique_key_field, df, True) zc = ZookeeperClient(hosts=get_solr_ensemble(), read_only=False) root_node = '%s/%s' % (ZK_SOLR_CONFIG_NAMESPACE, name) config_root_path = '%s/%s' % (solr_config_path, 'conf') try: zc.copy_path(root_node, config_root_path) except Exception, e: zc.delete_path(root_node) raise PopupException( _('Error in copying Solr configurations.'), detail=e) # Don't want directories laying around shutil.rmtree(tmp_path) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_collection(name): # Delete instance directory if we couldn't create a collection. try: zc.delete_path(root_node) except Exception, e: raise PopupException( _('Error in deleting Solr configurations.'), detail=e)
def search(request): response = {} collection = json.loads(request.POST.get('collection', '{}')) query = json.loads(request.POST.get('query', '{}')) query['download'] = 'download' in request.POST # todo: remove the selected histo facet if multiq if collection['id']: hue_collection = Collection.objects.get(id=collection['id']) # TODO perms if collection: try: response = SolrApi(SOLR_URL.get(), request.user).query(collection, query) response = augment_solr_response(response, collection, query) except RestException, e: try: response['error'] = json.loads(e.message)['error']['msg'] except: response['error'] = force_unicode(str(e)) except Exception, e: raise PopupException(e, title=_('Error while accessing Solr')) response['error'] = force_unicode(str(e))
def _fetch_collections(request): from search.conf import SOLR_URL path = request.GET['path'] item = None name = None if path: item = path if '/' in path: item, name = path.split('/') api = SolrApi(SOLR_URL.get(), request.user) if not item: return {"databases": ["collections", "configs"]} elif item and name: return {"authorizable_link": "/indexer/#edit/%s" % name, "extended_columns": [], "columns": [], "partition_keys": []} elif item == 'collections': return {"tables_meta": [{"comment": None, "type": "Table", "name": col} for col in api.collections2()]} elif item == 'configs': return {"tables_meta": [{"comment": None, "type": "Table", "name": conf} for conf in api.configs()]} else: raise PopupException(_('Authorizable %s could not be retrieved') % path)
def execute(self, notebook, snippet): from search.conf import SOLR_URL api = NativeSolrApi(SOLR_URL.get(), self.user.username) collection = self.options.get('collection') or snippet.get('database') if collection == 'default': collection = api.collections2()[0] response = api.sql(collection, snippet['statement']) info = response['result-set']['docs'].pop(-1) # EOF, RESPONSE_TIME, EXCEPTION if info.get('EXCEPTION'): raise QueryError(info['EXCEPTION']) data = [[cell for cell in doc.values()] for doc in response['result-set']['docs']] has_result_set = bool(data) return { 'sync': True, 'has_result_set': has_result_set, 'modified_row_count': 0, 'result': { 'has_more': False, 'data': data if has_result_set else [], 'meta': [{ 'name': col, 'type': '', 'comment': '' } for col in response['result-set']['docs'][0].keys()] if has_result_set else [], 'type': 'table' }, 'statement_id': 0, 'has_more_statements': False, 'statements_count': 1 }
def _create_non_solr_cloud_collection(self, name, fields, unique_key_field, df): # Non-solrcloud mode # Create instance directory locally. instancedir = os.path.join(CORE_INSTANCE_DIR.get(), name) if os.path.exists(instancedir): raise PopupException( _("Instance directory %s already exists! Please remove it from the file system." ) % instancedir) tmp_path, solr_config_path = copy_configs(fields, unique_key_field, df, False) try: shutil.move(solr_config_path, instancedir) finally: shutil.rmtree(tmp_path) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_core(name, instancedir): # Delete instance directory if we couldn't create a collection. shutil.rmtree(instancedir) raise PopupException( _('Could not create collection. Check error logs for more info.' ))
response['columns'] = [col['name'] for col in columns] response['extended_columns'] = columns response['status'] = 0 return response @query_error_handler def get_sample_data(self, snippet, database=None, table=None, column=None, async=False, operation=None): from search.conf import SOLR_URL db = NativeSolrApi(SOLR_URL.get(), self.user) assist = Assist(self, self.user, db) response = {'status': -1} if snippet.get('source') == 'sql': sample_data = assist.get_sample_data_sql(database, table, column) else: sample_data = assist.get_sample_data(database, table, column) if sample_data: response['status'] = 0 response['headers'] = sample_data['headers'] response['full_headers'] = sample_data.get('full_headers') response['rows'] = sample_data['rows'] else:
def admin_collection_schema(request, collection_id): hue_collection = Collection.objects.get(id=collection_id) solr_schema = SolrApi(SOLR_URL.get()).schema(hue_collection.name) content = {'solr_schema': solr_schema.decode('utf-8')} return HttpResponse(json.dumps(content), mimetype="application/json")
def is_core(self, core_name): solr_cores = SolrApi(SOLR_URL.get()).cores() return core_name in solr_cores
def is_collection(self, collection_name): solr_collections = SolrApi(SOLR_URL.get()).collections() return collection_name in solr_collections
class SolrApi(Api): def __init__(self, user, interpreter=None): Api.__init__(self, user, interpreter=interpreter) self.options = interpreter['options'] @query_error_handler def execute(self, notebook, snippet): from search.conf import SOLR_URL api = NativeSolrApi(SOLR_URL.get(), self.user.username) collection = self.options.get('collection') or snippet.get('database') if not collection or collection == 'default': collection = api.collections2()[0] response = api.sql(collection, snippet['statement']) info = response['result-set']['docs'].pop( -1) # EOF, RESPONSE_TIME, EXCEPTION if info.get('EXCEPTION'): raise QueryError(info['EXCEPTION']) headers = [] for row in response['result-set']['docs']: for col in row.keys(): if col not in headers: headers.append(col) data = [[doc.get(col) for col in headers] for doc in response['result-set']['docs']] has_result_set = bool(data) return { 'sync': True, 'has_result_set': has_result_set, 'modified_row_count': 0, 'result': { 'has_more': False, 'data': data if has_result_set else [], 'meta': [{ 'name': col, 'type': '', 'comment': '' } for col in headers] if has_result_set else [], 'type': 'table' }, 'statement_id': 0, 'has_more_statements': False, 'statements_count': 1 } @query_error_handler def check_status(self, notebook, snippet): return {'status': 'available'} @query_error_handler def fetch_result(self, notebook, snippet, rows, start_over): return {'has_more': False, 'data': [], 'meta': [], 'type': 'table'} @query_error_handler def fetch_result_metadata(self): pass @query_error_handler def cancel(self, notebook, snippet): return {'status': 0} @query_error_handler def get_log(self, notebook, snippet, startFrom=None, size=None): return 'No logs' def download(self, notebook, snippet, format): raise PopupException('Downloading is not supported yet') @query_error_handler def close_statement(self, snippet): return {'status': -1} @query_error_handler def autocomplete(self, snippet, database=None, table=None, column=None, nested=None): from search.conf import SOLR_URL api = NativeSolrApi(SOLR_URL.get(), self.user.username) assist = Assist(self, self.user, api) response = {'status': -1} if database is None: response['databases'] = [ self.options.get('collection') or snippet.get('database') or 'default' ] elif table is None: response['tables_meta'] = assist.get_tables(database) else: columns = assist.get_columns(database, table) response['columns'] = [col['name'] for col in columns] response['extended_columns'] = columns response['status'] = 0 return response @query_error_handler def get_sample_data(self, snippet, database=None, table=None, column=None, async=False): from search.conf import SOLR_URL db = NativeSolrApi(SOLR_URL.get(), self.user) assist = Assist(self, self.user, db) response = {'status': -1} if snippet.get('source') == 'sql': sample_data = assist.get_sample_data_sql(database, table, column) else: sample_data = assist.get_sample_data(database, table, column) if sample_data: response['status'] = 0 response['headers'] = sample_data['headers'] response['full_headers'] = sample_data.get('full_headers') response['rows'] = sample_data['rows'] else: response['message'] = _('Failed to get sample data.') return response
def get_solr_collection(self): return SolrApi(SOLR_URL.get(), self.user).collections()