def index_layer(self, layer_id, use_cache=False): """ Index a layer in the search backend. If cache is set, append it to the list, if it isn't send the transaction right away. cache needs memcached to be available. """ from hypermap.aggregator.models import Layer layer = Layer.objects.get(id=layer_id) if not layer.is_valid: LOGGER.debug('Not indexing or removing layer with id %s in search engine as it is not valid' % layer.id) unindex_layer(layer.id, use_cache) return if layer.was_deleted: LOGGER.debug('Not indexing or removing layer with id %s in search engine as was_deleted is true' % layer.id) unindex_layer(layer.id, use_cache) return # 1. if we use cache if use_cache: LOGGER.debug('Caching layer with id %s for syncing with search engine' % layer.id) layers = cache.get('layers') if layers is None: layers = set([layer.id]) else: layers.add(layer.id) cache.set('layers', layers) return # 2. if we don't use cache # TODO: Make this function more DRY # by abstracting the common bits. if SEARCH_TYPE == 'solr': from hypermap.aggregator.solr import SolrHypermap LOGGER.debug('Syncing layer %s to solr' % layer.name) solrobject = SolrHypermap() success, message = solrobject.layer_to_solr(layer) # update the error message if using celery if not settings.REGISTRY_SKIP_CELERY: if not success: self.update_state( state=states.FAILURE, meta=message ) raise Ignore() elif SEARCH_TYPE == 'elasticsearch': from hypermap.aggregator.elasticsearch_client import ESHypermap LOGGER.debug('Syncing layer %s to es' % layer.name) esobject = ESHypermap() success, message = esobject.layer_to_es(layer) # update the error message if using celery if not settings.REGISTRY_SKIP_CELERY: if not success: self.update_state( state=states.FAILURE, meta=message ) raise Ignore()
def index_layer(self, layer): # TODO: Make this function more DRY # by abstracting the common bits. if settings.SEARCH_TYPE == 'solr': from hypermap.aggregator.solr import SolrHypermap print 'Syncing layer %s to solr' % layer.name try: solrobject = SolrHypermap() success, message = solrobject.layer_to_solr(layer) if not success: from hypermap.aggregator.models import TaskError task_error = TaskError(task_name=self.name, args=layer.id, message=message) task_error.save() except: print 'There was an exception here!' self.retry(layer) elif settings.SEARCH_TYPE == 'elasticsearch': from hypermap.aggregator.elasticsearch_client import ESHypermap print 'Syncing layer %s to es' % layer.name esobject = ESHypermap() success, message = esobject.layer_to_es(layer) if not success: from hypermap.aggregator.models import TaskError task_error = TaskError(task_name=self.name, args=layer.id, message=message) task_error.save()
def index_layer(self, layer): # TODO: Make this function more DRY # by abstracting the common bits. if settings.SEARCH_TYPE == 'solr': from hypermap.aggregator.solr import SolrHypermap print 'Syncing layer %s to solr' % layer.name try: solrobject = SolrHypermap() success, message = solrobject.layer_to_solr(layer) if not success: from hypermap.aggregator.models import TaskError task_error = TaskError( task_name=self.name, args=layer.id, message=message ) task_error.save() except: print 'There was an exception here!' self.retry(layer) elif settings.SEARCH_TYPE == 'elasticsearch': from hypermap.aggregator.elasticsearch_client import ESHypermap print 'Syncing layer %s to es' % layer.name esobject = ESHypermap() success, message = esobject.layer_to_es(layer) if not success: from hypermap.aggregator.models import TaskError task_error = TaskError( task_name=self.name, args=layer.id, message=message ) task_error.save()
def index_cached_layers(self): """ Index and unindex all layers in the Django cache (Index all layers who have been checked). """ from hypermap.aggregator.models import Layer if SEARCH_TYPE == 'solr': from hypermap.aggregator.solr import SolrHypermap solrobject = SolrHypermap() else: from hypermap.aggregator.elasticsearch_client import ESHypermap from elasticsearch import helpers es_client = ESHypermap() layers_cache = cache.get('layers') deleted_layers_cache = cache.get('deleted_layers') # 1. added layers cache if layers_cache: layers_list = list(layers_cache) LOGGER.debug('There are %s layers in cache: %s' % (len(layers_list), layers_list)) batch_size = settings.REGISTRY_SEARCH_BATCH_SIZE batch_lists = [layers_list[i:i+batch_size] for i in range(0, len(layers_list), batch_size)] for batch_list_ids in batch_lists: layers = Layer.objects.filter(id__in=batch_list_ids) if batch_size > len(layers): batch_size = len(layers) LOGGER.debug('Syncing %s/%s layers to %s: %s' % (batch_size, len(layers_cache), layers, SEARCH_TYPE)) try: # SOLR if SEARCH_TYPE == 'solr': success, layers_errors_ids = solrobject.layers_to_solr(layers) if success: # remove layers from cache here layers_cache = layers_cache.difference(set(batch_list_ids)) LOGGER.debug('Removing layers with id %s from cache' % batch_list_ids) cache.set('layers', layers_cache) # ES elif SEARCH_TYPE == 'elasticsearch': with_bulk, success = True, False layers_to_index = [es_client.layer_to_es(layer, with_bulk) for layer in layers] message = helpers.bulk(es_client.es, layers_to_index) # Check that all layers where indexed...if not, don't clear cache. # TODO: Check why es does not index all layers at first. len_indexed_layers = message[0] if len_indexed_layers == len(layers): LOGGER.debug('%d layers indexed successfully' % (len_indexed_layers)) success = True if success: # remove layers from cache here layers_cache = layers_cache.difference(set(batch_list_ids)) cache.set('layers', layers_cache) else: raise Exception("Incorrect SEARCH_TYPE=%s" % SEARCH_TYPE) except Exception as e: LOGGER.error('Layers were NOT indexed correctly') LOGGER.error(e, exc_info=True) else: LOGGER.debug('No cached layers to add in search engine.') # 2. deleted layers cache if deleted_layers_cache: layers_list = list(deleted_layers_cache) LOGGER.debug('There are %s layers in cache for deleting: %s' % (len(layers_list), layers_list)) # TODO implement me: batch layer index deletion for layer_id in layers_list: # SOLR if SEARCH_TYPE == 'solr': if Layer.objects.filter(pk=layer_id).exists(): layer = Layer.objects.get(id=layer_id) unindex_layer(layer.id, use_cache=False) deleted_layers_cache = deleted_layers_cache.difference(set([layer_id])) cache.set('deleted_layers', deleted_layers_cache) else: # TODO implement me raise NotImplementedError else: LOGGER.debug('No cached layers to remove in search engine.')
def index_cached_layers(self): """ Index all layers in the Django cache (Index all layers who have been checked). """ from hypermap.aggregator.models import Layer from hypermap.aggregator.models import TaskError if SEARCH_TYPE == 'solr': from hypermap.aggregator.solr import SolrHypermap solrobject = SolrHypermap() else: from hypermap.aggregator.elasticsearch_client import ESHypermap from elasticsearch import helpers es_client = ESHypermap() layers_cache = cache.get('layers') if layers_cache: layers_list = list(layers_cache) LOGGER.debug('There are %s layers in cache: %s' % (len(layers_list), layers_list)) batch_size = settings.REGISTRY_SEARCH_BATCH_SIZE batch_lists = [layers_list[i:i+batch_size] for i in range(0, len(layers_list), batch_size)] for batch_list_ids in batch_lists: layers = Layer.objects.filter(id__in=batch_list_ids) if batch_size > len(layers): batch_size = len(layers) LOGGER.debug('Syncing %s/%s layers to %s: %s' % (batch_size, len(layers_cache), layers, SEARCH_TYPE)) try: if SEARCH_TYPE == 'solr': success, message = solrobject.layers_to_solr(layers) elif SEARCH_TYPE == 'elasticsearch': with_bulk, success = True, False layers_to_index = [es_client.layer_to_es(layer, with_bulk) for layer in layers] message = helpers.bulk(es_client.es, layers_to_index) # Check that all layers where indexed...if not, don't clear cache. # TODO: Check why es does not index all layers at first. len_indexed_layers = message[0] if len_indexed_layers == len(layers): LOGGER.debug('%d layers indexed successfully' % (len_indexed_layers)) success = True else: raise Exception("Incorrect SEARCH_TYPE=%s" % SEARCH_TYPE) if success: # remove layers from cache here layers_cache = layers_cache.difference(set(batch_list_ids)) cache.set('layers', layers_cache) else: task_error = TaskError( task_name=self.name, args=batch_list_ids, message=message ) task_error.save() except Exception as e: LOGGER.error('Layers were NOT indexed correctly') LOGGER.error(e, exc_info=True) else: LOGGER.debug('No cached layers.')
from hypermap.aggregator.models import TaskError task_error = TaskError( task_name=self.name, args=layer.id, message=message ) task_error.save() except Exception, e: LOGGER.error('Layers NOT indexed correctly') LOGGER.error(e, exc_info=True) self.retry(layer) elif SEARCH_TYPE == 'elasticsearch': from hypermap.aggregator.elasticsearch_client import ESHypermap LOGGER.debug('Syncing layer %s to es' % layer.name) esobject = ESHypermap() success, message = esobject.layer_to_es(layer) if not success: from hypermap.aggregator.models import TaskError task_error = TaskError( task_name=self.name, args=layer.id, message=message ) task_error.save() @shared_task(bind=True) def index_all_layers(self): from hypermap.aggregator.models import Layer layer_to_processes = Layer.objects.all()
def index_layer(self, layer, use_cache=False): """Index a layer in the search backend. If cache is set, append it to the list, if it isn't send the transaction right away. cache needs memcached to be available. """ if not layer.is_valid: LOGGER.debug( 'Not indexing or removing layer with id %s in search engine as it is not valid' % layer.id) unindex_layer(layer, use_cache) return if layer.was_deleted: LOGGER.debug( 'Not indexing or removing layer with id %s in search engine as was_deleted is true' % layer.id) unindex_layer(layer, use_cache) return # 1. if we use cache if use_cache: LOGGER.debug( 'Caching layer with id %s for syncing with search engine' % layer.id) layers = cache.get('layers') if layers is None: layers = set([layer.id]) else: layers.add(layer.id) cache.set('layers', layers) return # 2. if we don't use cache # TODO: Make this function more DRY # by abstracting the common bits. if SEARCH_TYPE == 'solr': from hypermap.aggregator.solr import SolrHypermap LOGGER.debug('Syncing layer %s to solr' % layer.name) try: solrobject = SolrHypermap() success, message = solrobject.layer_to_solr(layer) if not success: from hypermap.aggregator.models import TaskError task_error = TaskError(task_name=self.name, args=layer.id, message=message) task_error.save() except Exception as e: LOGGER.error('Layers NOT indexed correctly') LOGGER.error(e, exc_info=True) self.retry(layer) elif SEARCH_TYPE == 'elasticsearch': from hypermap.aggregator.elasticsearch_client import ESHypermap LOGGER.debug('Syncing layer %s to es' % layer.name) esobject = ESHypermap() success, message = esobject.layer_to_es(layer) if not success: from hypermap.aggregator.models import TaskError task_error = TaskError(task_name=self.name, args=layer.id, message=message) task_error.save()