def setUp(self) -> None: # Set up testing cores in Solr and swap them in self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME self.core_name_people = settings.SOLR_PEOPLE_TEST_CORE_NAME self.core_name_recap = settings.SOLR_RECAP_TEST_CORE_NAME self.si_opinion = scorched.SolrInterface( settings.SOLR_OPINION_URL, mode="rw" ) self.si_audio = scorched.SolrInterface( settings.SOLR_AUDIO_URL, mode="rw" ) self.si_people = scorched.SolrInterface( settings.SOLR_PEOPLE_URL, mode="rw" ) self.si_recap = scorched.SolrInterface( settings.SOLR_RECAP_URL, mode="rw" ) self.all_sis = [ self.si_opinion, self.si_audio, self.si_people, self.si_recap, ]
def setUp(self): # Set up testing cores in Solr and swap them in self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME self.core_name_people = settings.SOLR_PEOPLE_TEST_CORE_NAME self.core_name_recap = settings.SOLR_RECAP_TEST_CORE_NAME root = settings.INSTALL_ROOT create_temp_solr_core( self.core_name_opinion, os.path.join(root, 'Solr', 'conf', 'schema.xml'), ) create_temp_solr_core( self.core_name_audio, os.path.join(root, 'Solr', 'conf', 'audio_schema.xml'), ) create_temp_solr_core( self.core_name_people, os.path.join(root, 'Solr', 'conf', 'person_schema.xml'), ) create_temp_solr_core( self.core_name_recap, os.path.join(root, 'Solr', 'conf', 'recap_schema.xml')) self.si_opinion = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='rw') self.si_audio = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='rw') self.si_people = sunburnt.SolrInterface(settings.SOLR_PEOPLE_URL, mode='rw') # This will cause headaches, but it follows in the mission to slowly # migrate off of sunburnt. This was added after the items above, and so # uses scorched, not sunburnt. self.si_recap = scorched.SolrInterface(settings.SOLR_RECAP_URL, mode='rw')
def setUp(self): # Set up testing cores in Solr and swap them in self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME self.core_name_people = settings.SOLR_PEOPLE_TEST_CORE_NAME self.core_name_recap = settings.SOLR_RECAP_TEST_CORE_NAME self.si_opinion = sunburnt.SolrInterface( settings.SOLR_OPINION_URL, mode="rw" ) self.si_audio = sunburnt.SolrInterface( settings.SOLR_AUDIO_URL, mode="rw" ) self.si_people = sunburnt.SolrInterface( settings.SOLR_PEOPLE_URL, mode="rw" ) # This will cause headaches, but it follows in the mission to slowly # migrate off of sunburnt. This was added after the items above, and so # uses scorched, not sunburnt. self.si_recap = scorched.SolrInterface( settings.SOLR_RECAP_URL, mode="rw" ) self.all_sis = [ self.si_opinion, self.si_audio, self.si_people, self.si_recap, ]
def solr_delete(sender, instance, **kwargs): import scorched from django.conf import settings solr_conn = scorched.SolrInterface(settings.SOLR_SERVER) response = solr_conn.query(id=instance.id).execute() if response.result.docs: solr_conn.delete_by_ids([x['id'] for x in response.result.docs])
def add_docket_to_solr_by_rds(item_pks, force_commit=False): """Add RECAPDocuments from a single Docket to Solr. This is a performance enhancement that can be used when adding many RECAP Documents from a single docket to Solr. Instead of pulling the same docket metadata for these items over and over (adding potentially thousands of queries on a large docket), just pull the metadata once and cache it for every document that's added. :param item_pks: RECAPDocument pks to add or update in Solr. :param force_commit: Whether to send a commit to Solr (this is usually not needed). :return: None """ si = scorched.SolrInterface(settings.SOLR_RECAP_URL, mode="w") rds = RECAPDocument.objects.filter(pk__in=item_pks).order_by() try: metadata = rds[0].get_docket_metadata() except IndexError: metadata = None try: si.add([item.as_search_dict(docket_metadata=metadata) for item in rds]) if force_commit: si.commit() si.conn.http_connection.close() except SolrError as exc: add_docket_to_solr_by_rds.retry(exc=exc, countdown=30)
def add_or_update_recap_document(item_pks, coalesce_docket=False, force_commit=False): """Add or update recap documents in Solr. :param item_pks: RECAPDocument pks to add or update in Solr. :param coalesce_docket: If True, assume that the PKs all correspond to RECAPDocument objects on the same docket. Instead of processing each RECAPDocument individually, pull out repeated metadata so that it is only queried from the database once instead of once/RECAPDocument. This can provide significant performance improvements since some dockets have thousands of entries, each of which would otherwise need to make the same queries to the DB. :param force_commit: Should we send a commit message at the end of our updates? :return: None """ si = scorched.SolrInterface(settings.SOLR_RECAP_URL, mode='w') rds = RECAPDocument.objects.filter(pk__in=item_pks).order_by() if coalesce_docket: try: metadata = rds[0].get_docket_metadata() except IndexError: metadata = None else: metadata = None try: si.add([item.as_search_dict(docket_metadata=metadata) for item in rds]) if force_commit: si.commit() except SolrError as exc: add_or_update_recap_document.retry(exc=exc, countdown=30)
def _teardown_test_solr(): """Empty out the test cores that we use""" conns = [settings.SOLR_OPINION_TEST_URL, settings.SOLR_AUDIO_TEST_URL] for conn in conns: si = scorched.SolrInterface(conn, mode='rw') si.delete_all() si.commit()
def delete_items(items, solr_url, force_commit=False): si = scorched.SolrInterface(solr_url, mode='w') try: si.delete_by_ids(list(items)) if force_commit: si.commit() except SolrError as exc: delete_items.retry(exc=exc, countdown=30)
def solr_delete(sender, instance, created, **kwargs): from django.conf import settings import scorched solrconn = scorched.SolrInterface(settings.SOLR_SERVER) records = solrconn.query(type="language", item_id="{0}".format(instance.id)).execute() solrconn.delete_by_ids([x['id'] for x in records]) solrconn.commit()
def delete_items(items, app_label, force_commit=False): si = scorched.SolrInterface(settings.SOLR_URLS[app_label], mode="w") try: si.delete_by_ids(list(items)) if force_commit: si.commit() except SolrError as exc: delete_items.retry(exc=exc, countdown=30)
def delete_items(items, solr_obj_type, force_commit=False): si = scorched.SolrInterface(settings.SOLR_URLS[solr_obj_type], mode='w') try: si.delete_by_ids(list(items)) if force_commit: si.commit() except SolrError as exc: delete_items.retry(exc=exc, countdown=30)
def _teardown_test_solr() -> None: """Empty out the test cores that we use""" conns = [settings.SOLR_OPINION_TEST_URL, settings.SOLR_AUDIO_TEST_URL] for conn in conns: si = scorched.SolrInterface(conn, mode="rw") si.delete_all() si.commit() si.conn.http_connection.close()
def commit_if_not_yet(group_result): """Commit results if they have not yet been commited.""" for child in group_result.children: child = child.result if child[0] == 0: solr_conn = scorched.SolrInterface(settings.SOLR_SERVER) resp = solr_conn.query(id=child[1]).execute() if resp.result.numFound == 0: solr_conn.commit() return
def add_or_update_audio_files(item_pks, force_commit=False): si = scorched.SolrInterface(settings.SOLR_AUDIO_URL, mode='w') try: si.add([ item.as_search_dict() for item in Audio.objects.filter(pk__in=item_pks) ]) if force_commit: si.commit() except SolrError as exc: add_or_update_audio_files.retry(exc=exc, countdown=30)
def add_or_update_recap_document(item_pks, force_commit=False): si = scorched.SolrInterface(settings.SOLR_RECAP_URL, mode='w') try: si.add([ item.as_search_dict() for item in RECAPDocument.objects.filter(pk__in=item_pks) ]) if force_commit: si.commit() except SolrError, exc: add_or_update_recap_document.retry(exc=exc, countdown=30)
def add_or_update_opinions(item_pks, force_commit=False): si = scorched.SolrInterface(settings.SOLR_OPINION_URL, mode='w') try: si.add([ item.as_search_dict() for item in Opinion.objects.filter(pk__in=item_pks) ]) if force_commit: si.commit() except SolrError, exc: add_or_update_opinions.retry(exc=exc, countdown=30)
def add_or_update_cluster(pk, force_commit=False): si = scorched.SolrInterface(settings.SOLR_OPINION_URL, mode='w') try: si.add([ item.as_search_dict() for item in OpinionCluster.objects.get(pk=pk).sub_opinions.all() ]) if force_commit: si.commit() except SolrError as exc: add_or_update_cluster.retry(exc=exc, countdown=30)
def add_or_update_people(item_pks, force_commit=False): si = scorched.SolrInterface(settings.SOLR_PEOPLE_URL, mode='w') try: si.add([ item.as_search_dict() for item in Person.objects.filter(pk__in=item_pks) ]) if force_commit: si.commit() except SolrError as exc: add_or_update_people.retry(exc=exc, countdown=30)
def get(self, request, *args, **kwargs): page = request.GET.get('page') if page: start = ((int(page) - 1) * 12) else: start = 0 solr_conn = scorched.SolrInterface(settings.SOLR_SERVER) response = solr_conn.query().set_requesthandler('/minimal')\ .sort_by("-created_timestamp")\ .paginate(start=start, rows=RECENT_MANIFEST_COUNT).execute() return Response( format_response(request, response, page_by=RECENT_MANIFEST_COUNT))
def handle(self, *args, **options): solr = scorched.SolrInterface(settings.SOLR_URLS['credentials']) try: solr.delete_all() solr.commit() except Exception as e: raise CommandError('Collection could not be deleted: {}'.format(e)) self.stdout.write( self.style.SUCCESS( 'Successfully deleted all records in "credentials" collection') )
def do_minimal_search(request): page = request.GET.get('page') if page: start = ((int(page) - 1) * 10) else: start = 0 solr_conn = scorched.SolrInterface(settings.SOLR_SERVER) response = solr_conn.query(request.GET.get('q'))\ .set_requesthandler('/minimal')\ .paginate(start=start).execute() return format_response(request, response)
def get(self, request, *args, **kwargs): man_pk = self.kwargs['pk'] solr_conn = scorched.SolrInterface(settings.SOLR_SERVER) response = solr_conn.query(man_pk).set_requesthandler( '/manifest').execute() if response.result.numFound != 1: data = { "error": "Could not resolve manifest '{}'".format(man_pk), "numFound": response.result.numFound } return Response(data, status=status.HTTP_400_BAD_REQUEST) data = json.loads(response.result.docs[0]['manifest']) return Response(data)
def add_or_update_items(items, solr_object_type): """Adds an item to a solr index. This function is for use with the update_index command. It's slightly different than the commands below because it expects a Django object, rather than a primary key. This rejects the standard Celery advice about not passing objects around, but thread safety shouldn't be an issue since this is only used by the update_index command, and we want to get the objects in the task, not in its caller. :param items: A list of items or a single item to add or update in Solr :param solr_object_type: The solr object type being updated so that the URL can be pulled from the settings file. This is essential since different celery workers may connect to solr on different machines. :return None """ if hasattr(items, "items") or not hasattr(items, "__iter__"): # If it's a dict or a single item make it a list items = [items] search_item_list = [] for item in items: si = scorched.SolrInterface(settings.SOLR_URLS[solr_object_type], mode='w') try: if type(item) == Opinion: search_item_list.append(item.as_search_dict()) elif type(item) == RECAPDocument: search_item_list.append(item.as_search_dict()) elif type(item) == Docket: # Slightly different here b/c dockets return a list of search # dicts. search_item_list.extend(item.as_search_list()) elif type(item) == Audio: search_item_list.append(item.as_search_dict()) elif type(item) == Person: search_item_list.append(item.as_search_dict()) except AttributeError as e: print("AttributeError trying to add: %s\n %s" % (item, e)) except ValueError as e: print("ValueError trying to add: %s\n %s" % (item, e)) except InvalidDocumentError: print("Unable to parse: %s" % item) try: si.add(search_item_list) except socket.error as exc: add_or_update_items.retry(exc=exc, countdown=120) else: if type(item) == Docket: item.date_last_index = now() item.save()
def add_or_update_recap_docket( data, force_commit=False, update_threshold=60 * 60 ): """Add an entire docket to Solr or update it if it's already there. This is an expensive operation because to add or update a RECAP docket in Solr means updating every document that's a part of it. So if a docket has 10,000 documents, we'll have to pull them *all* from the database, and re-index them all. It'd be nice to not have to do this, but because Solr is de-normalized, every document in the RECAP Solr index has a copy of every field in Solr. For example, if the name of the case changes, that has to get reflected in every document in the docket in Solr. To deal with this mess, we have a field on the docket that says when we last updated it in Solr. If that date is after a threshold, we just don't do the update unless we know the docket has something new. :param data: A dictionary containing the a key for 'docket_pk' and 'content_updated'. 'docket_pk' will be used to find the docket to modify. 'content_updated' is a boolean indicating whether the docket must be updated. :param force_commit: Whether to send a commit to Solr (this is usually not needed). :param update_threshold: Items staler than this number of seconds will be updated. Items fresher than this number will be a no-op. """ if data is None: return si = scorched.SolrInterface(settings.SOLR_RECAP_URL, mode="w") some_time_ago = now() - timedelta(seconds=update_threshold) d = Docket.objects.get(pk=data["docket_pk"]) too_fresh = d.date_last_index is not None and ( d.date_last_index > some_time_ago ) update_not_required = not data.get("content_updated", False) if all([too_fresh, update_not_required]): return else: try: si.add(d.as_search_list()) if force_commit: si.commit() si.conn.http_connection.close() except SolrError as exc: add_or_update_recap_docket.retry(exc=exc, countdown=30) else: d.date_last_index = now() d.save()
def get(self, request, *args, **kwargs): """Return formatted result based on query""" q = request.GET.get('q') if not q or len(q) < 3: return Response({'suggestions': []}) solr_conn = scorched.SolrInterface(settings.SOLR_SERVER) response = solr_conn.query(q) \ .set_requesthandler('/suggest').execute() suggestions = response.spellcheck['suggestions'] nice_list = [] if suggestions: nice_list = suggestions[1]['suggestion'] return Response({'suggestions': nice_list})
def _run(self, args, config): ''' Main entry point. ''' try: solr_url = config.get('solr', 'url').rstrip('/') + '/' solr = scorched.SolrInterface(solr_url) except: raise cli.CliError('Unable to connect to solr: %s' % solr_url) if args.action in ('add', 'add-all'): database_config = dict(config.items('database')) db = app.database.get_engine(database_config) if args.stubs == 1: profile_stubs = True else: profile_stubs = False if args.action == 'add': self.add_models(db, solr, args.models.split(','), profile_stubs) else: self.add_models(db, solr, profile_stubs=profile_stubs) solr.optimize() self._logger.info("Added requested documents and optimized index.") elif args.action in ('delete', 'delete-all'): if args.action == 'delete': self.delete_models(solr, args.models.split(',')) else: solr.delete_all() solr.optimize() self._logger.info("Deleted requested documents and optimized " "index.") elif args.action == 'optimize': solr.optimize() self._logger.info("Optimized index.") elif args.action == 'schema': schema_url = urljoin(solr_url, 'schema') self.schema(schema_url)
def add_or_update_items(items, solr_url=settings.SOLR_OPINION_URL): """Adds an item to a solr index. This function is for use with the update_index command. It's slightly different than the commands below because it expects a Django object, rather than a primary key. This rejects the standard Celery advice about not passing objects around, but thread safety shouldn't be an issue since this is only used by the update_index command, and we want to get the objects in the task, not in its caller. """ si = scorched.SolrInterface(solr_url, mode='w') if hasattr(items, "items") or not hasattr(items, "__iter__"): # If it's a dict or a single item make it a list items = [items] search_item_list = [] for item in items: try: if type(item) == Opinion: search_item_list.append(item.as_search_dict()) elif type(item) == RECAPDocument: search_item_list.append(item.as_search_dict()) elif type(item) == Docket: # Slightly different here b/c dockets return a list of search # dicts. search_item_list.extend(item.as_search_list()) elif type(item) == Audio: search_item_list.append(item.as_search_dict()) elif type(item) == Person: search_item_list.append(item.as_search_dict()) except AttributeError as e: print("AttributeError trying to add: %s\n %s" % (item, e)) except ValueError as e: print("ValueError trying to add: %s\n %s" % (item, e)) except InvalidDocumentError: print("Unable to parse: %s" % item) try: si.add(search_item_list) except socket.error as exc: add_or_update_items.retry(exc=exc, countdown=120)
def solr_index(sender, instance, created, **kwargs): import uuid from django.conf import settings import scorched solrconn = scorched.SolrInterface(settings.SOLR_SERVER) #check if it already exists records = solrconn.query(type="language", item_id="{0}".format(instance.id)).execute() if records: #delete first. then add solrconn.delete_by_ids([x['id'] for x in records]) d = { 'id': str(uuid.uuid4()), 'type': 'language', 'item_id': instance.id, 'name': instance.name, } solrconn.add(d) solrconn.commit()
def add_items_to_solr(item_pks, app_label, force_commit=False): """Add a list of items to Solr :param item_pks: An iterable list of item PKs that you wish to add to Solr. :param app_label: The type of item that you are adding. :param force_commit: Whether to send a commit to Solr after your addition. This is generally not advised and is mostly used for testing. """ search_dicts = [] model = apps.get_model(app_label) items = model.objects.filter(pk__in=item_pks).order_by() for item in items: try: if model in [OpinionCluster, Docket]: # Dockets make a list of items; extend, don't append search_dicts.extend(item.as_search_list()) else: search_dicts.append(item.as_search_dict()) except AttributeError as e: print("AttributeError trying to add: %s\n %s" % (item, e)) except ValueError as e: print("ValueError trying to add: %s\n %s" % (item, e)) except InvalidDocumentError: print("Unable to parse: %s" % item) si = scorched.SolrInterface(settings.SOLR_URLS[app_label], mode="w") try: si.add(search_dicts) if force_commit: si.commit() si.conn.http_connection.close() except (socket.error, SolrError) as exc: add_items_to_solr.retry(exc=exc, countdown=30) else: # Mark dockets as updated if needed if model == Docket: items.update(date_modified=now(), date_last_index=now()) si.conn.http_connection.close()
def solr_index(sender, instance, created, **kwargs): import uuid from django.conf import settings import scorched solrconn = scorched.SolrInterface(settings.SOLR_SERVER) #check if it already exists records = solrconn.query(type="snippet", item_id="{0}".format(instance.pk)).execute() if records: #delete first. then add solrconn.delete_by_ids([x['id'] for x in records]) d = { 'id': str(uuid.uuid4()), 'type': 'snippet', 'item_id': instance.pk, 'snippet': instance.snippet, 'title': instance.title, 'tags': [tag.name for tag in instance.tags.all()], } solrconn.add(d) solrconn.commit()