def haystack_batch_update(app_label, model_name, pks=None, start=None, end=None, date_lookup=None, batch_size=100, remove=True): """ Batches haystack index updates for the given model. If no pks are given, a general reindex will be launched. """ model_class = get_model(app_label, model_name) using = connection_router.for_write()[0] index = connections[using].get_unified_index().get_index(model_class) pk_qs = index.index_queryset() if pks is not None: pk_qs = pk_qs.filter(pk__in=pks) if date_lookup is None: date_lookup = index.get_updated_field() if date_lookup is not None: if start is not None: pk_qs = pk_qs.filter(**{"%s__gte" % date_lookup: start}) if end is not None: pk_qs = pk_qs.filter(**{"%s__lte" % date_lookup: end}) pks = list(pk_qs.distinct().values_list('pk', flat=True)) total = len(pks) for start in xrange(0, total, batch_size): end = min(start + batch_size, total) haystack_update.delay(app_label, model_name, pks[start:end], remove=remove)
def worker(bits): # We need to reset the connections, otherwise the different processes # will try to share the connection, which causes things to blow up. from django.db import connections for alias, info in connections.databases.items(): # We need to also tread lightly with SQLite, because blindly wiping # out connections (via ``... = {}``) destroys in-memory DBs. if not 'sqlite3' in info['ENGINE']: try: db.close_connection() del(connections._connections[alias]) except KeyError: pass if bits[0] == 'do_update': func, model, start, end, total, using, start_date, end_date, verbosity = bits elif bits[0] == 'do_remove': func, model, pks_seen, start, upper_bound, using, verbosity = bits else: return backend_alias = using or connection_router.for_write(**{'models': [model]}) unified_index = haystack_connections[backend_alias].get_unified_index() backend = haystack_connections[backend_alias].get_backend() index = unified_index.get_index(model) if func == 'do_update': qs = index.build_queryset(start_date=start_date, end_date=end_date) do_update(backend, index, qs, start, end, total, verbosity=verbosity) elif bits[0] == 'do_remove': do_remove(backend, index, model, pks_seen, start, upper_bound, verbosity=verbosity)
def handle_index_update(action=None, callback_params={}): """This one does the actual work of re-indexing the instance. """ if action is None or action == "": logger.warn("Indexing action cannot be None or empty!") return ct_id = callback_params["content_type_id"] model = ContentType.objects.get(id=ct_id).model_class() instance = model.objects.get(pk=callback_params["pk"]) using_backends = connection_router.for_write(instance=instance) for using in using_backends: try: unified_index = connections[using].get_unified_index() index = unified_index.get_index(model) if action == ADD: if instance.state == "unpublished": index.remove_object(instance, using=using) else: index.update_object(instance, using=using) elif action == DELETE: index.remove_object(instance, using=using) # clean up the index item table now. try: item = IndexedItem.objects.get(content_type_pk=ct_id, instance_pk=instance.pk) item.delete() except ObjectDoesNotExist: logger.warn("IndexedItem not found... continuing.") except NotHandled: logger.warn("No indexing backend found for %s" % instance)
def update_index(app_label, pk, is_save): """ 更新缓存 算法: 通过维护redis中的锁,确保事务的串行 :param app_label: 模型名 :param pk: id :param is_save: 是否是保存. true:保存(insert/update) false:删除delete :return: """ app_name, model_name = app_label.split('.') app = apps.get_app_config(app_name) model = app.get_model(model_name) instance = model.objects.filter(pk=pk).first() if instance is None: return using_backends = connection_router.for_write(instance=instance) for using in using_backends: try: index = haystack_connections[using].get_unified_index().get_index( instance._meta.model) do_update_index(instance, index, using, is_save) except NotHandled: # TODO: Maybe log it or let the exception bubble? continue
def _get_backend(self, using): if using is None: hints = { 'index': self, 'models': [self.get_model()] } using = connection_router.for_write(**hints) return connections[using].get_backend()
def indexes_for_object(instance): using_backends = connection_router.for_write(instance=instance) for using in using_backends: try: model = type(instance) index = connections[using].get_unified_index().get_index(model) yield index, using except NotHandled: pass
def _get_backend(self, using): if using is None: try: using = connection_router.for_write(index=self)[0] except IndexError: # There's no backend to handle it. Bomb out. return None return connections[using].get_backend()
def get_backend(self, using=None): if using is None: try: using = connection_router.for_write(index=self)[0] except IndexError: # There's no backend to handle it. Bomb out. return None return connections[using].get_backend()
def get_indexes(self, model_class, **kwargs): """ Fetch the model's registered ``SearchIndex`` in a standarized way. """ try: using_backends = connection_router.for_write(**{"models": [model_class]}) for using in using_backends: index_holder = connections[using].get_unified_index() yield index_holder.get_index(model_class), using except IndexNotFoundException: raise ImproperlyConfigured("Couldn't find a SearchIndex for %s." % model_class)
def get_indexes(model_class): """ Fetch the model's registered ``SearchIndex`` in a standarized way. """ try: using_backends = connection_router.for_write( **{'models': [model_class]}) for using in using_backends: index_holder = connections[using].get_unified_index() yield index_holder.get_index(model_class), using except IndexNotFoundException: raise ImproperlyConfigured("Couldn't find a SearchIndex for %s." % model_class)
def haystack_remove(app_label, model_name, pks): """ Removes the haystack records for any instances with the given pks. """ using = connection_router.for_write()[0] backend = connections[using].get_backend() def callback(): for pk in pks: backend.remove(".".join((app_label, model_name, str(pk)))) _haystack_database_retry(haystack_remove, callback)
def update_lr_index_entry(res_obj): """ Updates/creates the search index entry for the given language resource object. The appropriate search index is automatically chosen. """ router_name = haystack_connection_router.for_write() if hasattr(router_name, '__iter__'): router_name = router_name[0] haystack_connections[router_name] \ .get_unified_index().get_index(resourceInfoType_model) \ .update_object(res_obj)
def get_index(self, model_class, **kwargs): """ Fetch the model's registered ``SearchIndex`` in a standarized way. """ try: if legacy: index_holder = site else: backend_alias = connection_router.for_write(**{'models': [model_class]}) index_holder = connections[backend_alias].get_unified_index() # noqa return index_holder.get_index(model_class) except IndexNotFoundException: raise ImproperlyConfigured("Couldn't find a SearchIndex for %s." % model_class) return None
def process_action(action, instance, model): # Taken from celery_haystack.signals.CelerySignalProcessor.enqueue using_backends = connection_router.for_write(instance=instance) for using in using_backends: try: connection = connections[using] index = connection.get_unified_index().get_index(model) except NotHandled: continue if isinstance(index, CelerySearchIndex): if action == 'update' and not index.should_update(instance): continue identifier = get_identifier(instance) get_update_task()()(action, identifier) break
def get_index(self, model_class, **kwargs): """ Fetch the model's registered ``SearchIndex`` in a standarized way. """ try: if legacy: index_holder = site else: backend_alias = connection_router.for_write( **{'models': [model_class]}) index_holder = connections[backend_alias].get_unified_index( ) # noqa return index_holder.get_index(model_class) except IndexNotFoundException: raise ImproperlyConfigured("Couldn't find a SearchIndex for %s." % model_class) return None
def repair_solr(short_id): """ Repair SOLR index content for a resource """ logger = logging.getLogger(__name__) try: res = BaseResource.objects.get(short_id=short_id) except BaseResource.DoesNotExist: print("{} does not exist".format(short_id)) # instance with proper type instance = res.get_content_model() assert instance, (res, res.content_model) print("re-indexing {} in solr".format(short_id)) # instance of BaseResource matching real instance baseinstance = BaseResource.objects.get(pk=instance.pk) basesender = BaseResource using_backends = connection_router.for_write(instance=baseinstance) for using in using_backends: # if object is public/discoverable or becoming public/discoverable, index it if instance.raccess.public or instance.raccess.discoverable: try: index = connections[using].get_unified_index().get_index( basesender) index.update_object(baseinstance, using=using) except NotHandled: logger.exception( "Failure: changes to %s with short_id %s not added to Solr Index.", str(type(instance)), baseinstance.short_id) # if object is private or becoming private, delete from index else: try: index = connections[using].get_unified_index().get_index( basesender) index.remove_object(baseinstance, using=using) except NotHandled: logger.exception( "Failure: delete of %s with short_id %s failed.", str(type(instance)), baseinstance.short_id)
def update_instance_indexes(sender_type_id, object_type_id, object_id): """ Given an individual model instance, update its entire indexes. """ sender = ContentType.objects.get_for_id(sender_type_id) object_type = ContentType.objects.get_for_id(object_type_id) instance = object_type.get_object_for_this_type(pk=object_id) try: using_backends = connection_router.for_write(instance=instance) except IndexError: # No valid instance given, stop processing here return None for using in using_backends: try: index = connections[using].get_unified_index().get_index(sender) index.update(using=using) except NotHandled: # TODO: Maybe log it or let the exception bubble? pass
def repair_solr(short_id): """ Repair SOLR index content for a resource """ logger = logging.getLogger(__name__) try: res = BaseResource.objects.get(short_id=short_id) except BaseResource.DoesNotExist: print("{} does not exist".format(short_id)) # instance with proper type instance = res.get_content_model() assert instance, (res, res.content_model) print("re-indexing {} in solr".format(short_id)) # instance of BaseResource matching real instance baseinstance = BaseResource.objects.get(pk=instance.pk) basesender = BaseResource using_backends = connection_router.for_write(instance=baseinstance) for using in using_backends: # if object is public/discoverable or becoming public/discoverable, index it if instance.raccess.public or instance.raccess.discoverable: try: index = connections[using].get_unified_index().get_index(basesender) index.update_object(baseinstance, using=using) except NotHandled: logger.exception( "Failure: changes to %s with short_id %s not added to Solr Index.", str(type(instance)), baseinstance.short_id) # if object is private or becoming private, delete from index else: try: index = connections[using].get_unified_index().get_index(basesender) index.remove_object(baseinstance, using=using) except NotHandled: logger.exception("Failure: delete of %s with short_id %s failed.", str(type(instance)), baseinstance.short_id)
def haystack_update(app_label, model_name, pks, remove=True): """ Updates the haystack records for any valid instances with the given pks. Generally, ``remove`` should be ``True`` so that items which are no longer in the ``index_queryset()`` will be taken out of the index; however, ``remove`` can be set to ``False`` to save some time if that behavior isn't needed. """ model_class = get_model(app_label, model_name) using = connection_router.for_write()[0] backend = connections[using].get_backend() index = connections[using].get_unified_index().get_index(model_class) qs = index.index_queryset().filter(pk__in=pks) if qs: _haystack_database_retry(haystack_update, lambda: backend.update(index, qs)) if remove: unseen_pks = set(pks) - set((instance.pk for instance in qs)) haystack_remove.apply(args=(app_label, model_name, unseen_pks))
def remove_objects_indexes(sender_type_id, object_type_id, object_id): """ Given a set of `objects` model instances, remove them from the index as preparation for the new index. """ sender = ContentType.objects.get_for_id(sender_type_id) object_type = ContentType.objects.get_for_id(object_type_id) instance = object_type.get_object_for_this_type(pk=object_id) if isinstance(instance, Submission): # Submission have complex status handling, so a status change should lead to # more drastic reindexing. ids_list = [ k['id'] for k in list(instance.thread.public().values('id')) ] objects = Submission.objects.filter(pk__in=ids_list) else: # Objects such as Reports, Comments, Commentaries, etc. may get rejected. This # does not remove them from the index. Therefore, do a complete rebuild_index # action on that specific instance every time the index signal is triggered. objects = [instance] try: using_backends = connection_router.for_write(instance=objects[0]) except IndexError: # No submissions given, stop processing here return None for instance in objects: for using in using_backends: try: index = connections[using].get_unified_index().get_index( sender) index.remove_object(instance, using=using) except NotHandled: # TODO: Maybe log it or let the exception bubble? pass
def _get_backend(self, using): if using is None: using = connection_router.for_write(index=self) return connections[using].get_backend()
def get_indexes(model_class): using_backends = connection_router.for_write(models=[model_class]) for using in using_backends: index_holder = connections[using].get_unified_index() yield index_holder.get_index(model_class)