def remove_indexed_files(model, version, build): """ Remove files from the version from the search index. This excludes files from the current build. """ if not DEDConfig.autosync_enabled(): log.info( 'Autosync disabled, skipping removal from the search index for: %s:%s', version.project.slug, version.slug, ) return try: document = list(registry.get_documents(models=[model]))[0] log.info( 'Deleting old files from search index for: %s:%s', version.project.slug, version.slug, ) (document().search().filter( 'term', project=version.project.slug).filter( 'term', version=version.slug).exclude('term', build=build).delete()) except Exception: log.exception('Unable to delete a subset of files. Continuing.')
def get_data_of_related_instances(self, instance): if not DEDConfig.autosync_enabled(): return [] related_instances = set() for doc in self._get_related_doc(instance): doc_instance = doc() try: related = doc_instance.get_instances_from_related(instance) if isinstance(related, models.Model): related_instances.add(related) else: related_instances.update(related) except ObjectDoesNotExist: pass data = [] for obj in related_instances: if not obj.is_removed and not obj.is_permanently_removed: meta = obj._meta data.append({ 'app_label': meta.app_label, 'object_name': meta.concrete_model._meta.object_name, 'instance_id': obj.id, }) return data
def remove_indexed_files(model, project_slug, version_slug=None, build_id=None): """ Remove files from `version_slug` of `project_slug` from the search index. :param model: Class of the model to be deleted. :param project_slug: Project slug. :param version_slug: Version slug. If isn't given, all index from `project` are deleted. :param build_id: Build id. If isn't given, all index from `version` are deleted. """ log.bind( project_slug=project_slug, version_slug=version_slug, ) if not DEDConfig.autosync_enabled(): log.info('Autosync disabled, skipping removal from the search index.') return try: document = list(registry.get_documents(models=[model]))[0] log.info('Deleting old files from search index.') documents = ( document().search() .filter('term', project=project_slug) ) if version_slug: documents = documents.filter('term', version=version_slug) if build_id: documents = documents.exclude('term', build=build_id) documents.delete() except Exception: log.exception('Unable to delete a subset of files. Continuing.')
def _handle_related(self, instance) -> None: """ Handle related instances changing by sending a group of tasks, assuming 'get_instances_from_related' document method always returns list of ids or None """ if not DEDConfig.autosync_enabled(): return sync_group = [] for doc in registry._get_related_doc(instance): doc_instance = doc(related_instance_to_ignore=instance) related_model = doc_instance.Django.model if (not self._is_sync_allowed(related_model) or not isinstance( instance, doc_instance.Django.related_models)): continue related = doc_instance.get_instances_from_related(instance) if related is None: continue task = related_model.get_sync_task() sync_group += [task.s(obj_id) for obj_id in related] if not sync_group: return sync_group = group(sync_group) connection = transaction.get_connection() if not connection.in_atomic_block: sync_group() else: transaction.on_commit(lambda: sync_group())
def remove_indexed_file(sender, instance_list, **kwargs): """Remove deleted files from the build process.""" if not instance_list: return model = sender document = list(registry.get_documents(models=[model]))[0] version = kwargs.get('version') commit = kwargs.get('commit') index_kwargs = { 'app_label': model._meta.app_label, 'model_name': model.__name__, 'document_class': str(document), 'objects_id': [obj.id for obj in instance_list], } # Do not index if autosync is disabled globally if DEDConfig.autosync_enabled(): delete_objects_in_es(**index_kwargs) if version and commit: # Sanity check by deleting all old files not in this commit log.info('Deleting old commits from search index') document().search().filter( 'term', version=version.slug, ).filter( 'term', project=version.project.slug, ).exclude( 'term', commit=commit, ).delete()
def index_new_files(model, version, build): """Index new files from the version into the search index.""" if not DEDConfig.autosync_enabled(): log.info( 'Autosync disabled, skipping indexing into the search index for: %s:%s', version.project.slug, version.slug, ) return try: document = list(registry.get_documents(models=[model]))[0] doc_obj = document() queryset = ( doc_obj.get_queryset() .filter(project=version.project, version=version, build=build) ) log.info( 'Indexing new objecst into search index for: %s:%s', version.project.slug, version.slug, ) doc_obj.update(queryset.iterator()) except Exception: log.exception('Unable to index a subset of files. Continuing.')
def delete_documents_by_model_and_id(self, model, _id, **kwargs): if not DEDConfig.autosync_enabled(): return if model in self._models: for doc in self._models[model]: if not doc.django.ignore_signals: doc().delete_by_id(_id, **kwargs)
def remove_project_delete(instance, *args, **kwargs): from readthedocs.search.documents import ProjectDocument kwargs = { 'app_label': Project._meta.app_label, 'model_name': Project.__name__, 'document_class': str(ProjectDocument), 'objects_id': [instance.id], } # Don't `delay` this because the objects will be deleted already if DEDConfig.autosync_enabled(): delete_objects_in_es(**kwargs)
def index_html_file(instance_list, **_): kwargs = { 'app_label': HTMLFile._meta.app_label, 'model_name': HTMLFile.__name__, 'document_class': str(PageDocument), 'index_name': None, # No need to change the index name 'objects_id': [obj.id for obj in instance_list], } # Do not index if autosync is disabled globally if DEDConfig.autosync_enabled(): index_objects_to_es(**kwargs)
def index_project(instance, *args, **kwargs): kwargs = { 'app_label': Project._meta.app_label, 'model_name': Project.__name__, 'document_class': str(ProjectDocument), 'index_name': None, # No need to change the index name 'objects_id': [instance.id], } # Do not index if autosync is disabled globally if DEDConfig.autosync_enabled(): index_objects_to_es.delay(**kwargs)
def remove_project_delete(instance, *args, **kwargs): from readthedocs.search.documents import ProjectDocument kwargs = { 'app_label': Project._meta.app_label, 'model_name': Project.__name__, 'document_class': str(ProjectDocument), 'objects_id': [instance.id], } # Do not index if autosync is disabled globally if DEDConfig.autosync_enabled(): delete_objects_in_es.delay(**kwargs)
def remove_html_file(instance_list, **_): """Remove deleted files from the build process.""" from readthedocs.search.documents import PageDocument kwargs = { 'app_label': HTMLFile._meta.app_label, 'model_name': HTMLFile.__name__, 'document_class': str(PageDocument), 'objects_id': [obj.id for obj in instance_list], } # Do not index if autosync is disabled globally if DEDConfig.autosync_enabled(): delete_objects_in_es(**kwargs)
def index_indexed_file(sender, instance_list, **kwargs): """Handle indexing from the build process.""" if not instance_list: return model = sender document = list(registry.get_documents(models=[model]))[0] index_kwargs = { 'app_label': model._meta.app_label, 'model_name': model.__name__, 'document_class': str(document), 'objects_id': [obj.id for obj in instance_list], } # Do not index if autosync is disabled globally if DEDConfig.autosync_enabled(): index_objects_to_es(**index_kwargs)
def index_html_file_save(instance, *args, **kwargs): """ Save a HTMLFile instance based on the post_save signal.post_save. This uses Celery to do it async, replacing how django-elasticsearch-dsl does it. """ from readthedocs.search.documents import PageDocument kwargs = { 'app_label': HTMLFile._meta.app_label, 'model_name': HTMLFile.__name__, 'document_class': str(PageDocument), 'objects_id': [instance.id], } # Do not index if autosync is disabled globally if DEDConfig.autosync_enabled(): index_objects_to_es.delay(**kwargs)
def _is_sync_allowed(sender) -> bool: return DEDConfig.autosync_enabled() and issubclass( sender, SyncTaskMixin)
def remove_html_file(instance_list, **_): # Do not index if autosync is disabled globally if DEDConfig.autosync_enabled(): registry.delete(instance_list)
def remove_project(instance, *args, **kwargs): # Do not index if autosync is disabled globally if DEDConfig.autosync_enabled(): registry.delete(instance)