def setUp(self): # Set up testing cores in Solr and swap them in self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME self.core_name_people = settings.SOLR_PEOPLE_TEST_CORE_NAME self.core_name_recap = settings.SOLR_RECAP_TEST_CORE_NAME self.si_opinion = sunburnt.SolrInterface( settings.SOLR_OPINION_URL, mode="rw" ) self.si_audio = sunburnt.SolrInterface( settings.SOLR_AUDIO_URL, mode="rw" ) self.si_people = sunburnt.SolrInterface( settings.SOLR_PEOPLE_URL, mode="rw" ) # This will cause headaches, but it follows in the mission to slowly # migrate off of sunburnt. This was added after the items above, and so # uses scorched, not sunburnt. self.si_recap = scorched.SolrInterface( settings.SOLR_RECAP_URL, mode="rw" ) self.all_sis = [ self.si_opinion, self.si_audio, self.si_people, self.si_recap, ]
def setUp(self): # Set up testing cores in Solr and swap them in self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME self.core_name_people = settings.SOLR_PEOPLE_TEST_CORE_NAME self.core_name_recap = settings.SOLR_RECAP_TEST_CORE_NAME root = settings.INSTALL_ROOT create_temp_solr_core( self.core_name_opinion, os.path.join(root, 'Solr', 'conf', 'schema.xml'), ) create_temp_solr_core( self.core_name_audio, os.path.join(root, 'Solr', 'conf', 'audio_schema.xml'), ) create_temp_solr_core( self.core_name_people, os.path.join(root, 'Solr', 'conf', 'person_schema.xml'), ) create_temp_solr_core( self.core_name_recap, os.path.join(root, 'Solr', 'conf', 'recap_schema.xml')) self.si_opinion = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='rw') self.si_audio = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='rw') self.si_people = sunburnt.SolrInterface(settings.SOLR_PEOPLE_URL, mode='rw') # This will cause headaches, but it follows in the mission to slowly # migrate off of sunburnt. This was added after the items above, and so # uses scorched, not sunburnt. self.si_recap = scorched.SolrInterface(settings.SOLR_RECAP_URL, mode='rw')
def __init__(self, *args, **kwargs): super(Command, self).__init__(*args, **kwargs) self.connections = { 'o': sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r'), 'oa': sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='r'), } self.options = {} self.valid_ids = {}
def index_sitemap_maker(request): """Generate a sitemap index page Counts the number of cases in the site, divides by `items_per_sitemap` and provides links items. """ connection_string_obj_type_pairs = ( (settings.SOLR_OPINION_URL, 'opinions'), (settings.SOLR_AUDIO_URL, 'oral-arguments'), ) sites = [] for connection_string, obj_type in connection_string_obj_type_pairs: conn = sunburnt.SolrInterface(connection_string, mode='r') search_results_object = conn.raw_query(**index_solr_params).execute() count = search_results_object.result.numFound num_pages = count / items_per_sitemap + 1 for i in range(1, num_pages + 1): sites.append('https://www.courtlistener.com/sitemap-%s.xml?p=%s' % (obj_type, i)) # Random additional sitemaps. sites.extend([ 'https://www.courtlistener.com/sitemap-simple-pages.xml', 'https://www.courtlistener.com/sitemap-visualizations.xml', ]) xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites}) # These links contain case names, so they should get crawled but not # indexed response = HttpResponse(xml, content_type='application/xml') response['X-Robots-Tag'] = 'noindex, noodp, noarchive, noimageindex' return response
def people_sitemap_maker(request): conn = sunburnt.SolrInterface(settings.SOLR_PEOPLE_URL, mode='r') page = int(request.GET.get("p", 1)) start = (page - 1) * items_per_sitemap params = { 'q': '*:*', 'rows': items_per_sitemap, 'start': start, 'fl': ','.join([ 'absolute_url', 'timestamp', ]), 'sort': 'dob asc', 'caller': 'people_sitemap_maker', } results = conn.raw_query(**params).execute() # Translate Solr object into something Django's template can use urls = [] for result in results: urls.append({ 'location': 'https://www.courtlistener.com%s' % result['absolute_url'], 'changefreq': 'monthly', 'lastmod': result['timestamp'], 'priority': '0.5', }) xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls})) response = HttpResponse(xml, content_type='application/xml') response['X-Robots-Tag'] = 'noindex, noodp, noarchive, noimageindex' return response
def coverage_data(request, version, court): """Provides coverage data for a court. Responds to either AJAX or regular requests. """ if court != 'all': court_str = get_object_or_404(Court, pk=court).pk else: court_str = 'all' q = request.GET.get('q') conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r') response = conn.raw_query(**build_coverage_query(court_str, q)).execute() counts = response.facet_counts.facet_ranges[0][1][0][1] counts = strip_zero_years(counts) # Calculate the totals annual_counts = {} total_docs = 0 for date_string, count in counts: annual_counts[date_string[:4]] = count total_docs += count return JsonResponse({ 'annual_counts': annual_counts, 'total': total_docs, }, safe=True)
def make_court_variable(): courts = Court.objects.exclude(jurisdiction=Court.TESTING_COURT) conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r') response = conn.raw_query(**build_court_count_query()).execute() court_count_tuples = response.facet_counts.facet_fields['court_exact'] courts = annotate_courts_with_counts(courts, court_count_tuples) return courts
def add_or_update_items(items, solr_url=settings.SOLR_OPINION_URL): """Adds an item to a solr index. This function is for use with the update_index command. It's slightly different than the commands below because it expects a Django object, rather than a primary key. This rejects the standard Celery advice about not passing objects around, but thread safety shouldn't be an issue since this is only used by the update_index command, and we want to query and build the SearchDocument objects in the task, not in its caller. """ si = sunburnt.SolrInterface(solr_url, mode='w') if hasattr(items, "items") or not hasattr(items, "__iter__"): # If it's a dict or a single item make it a list items = [items] search_item_list = [] for item in items: try: if type(item) == Audio: search_item_list.append(SearchAudioFile(item)) elif type(item) == Opinion: search_item_list.append(SearchDocument(item)) except AttributeError as e: print "AttributeError trying to add: %s\n %s" % (item, e) except ValueError as e: print "ValueError trying to add: %s\n %s" % (item, e) except InvalidDocumentError: print "Unable to parse: %s" % item try: si.add(search_item_list) except socket.error, exc: add_or_update_items.retry(exc=exc, countdown=120)
def view_opinion(request, pk, _): """Using the cluster ID, return the cluster of opinions. We also test if the cluster ID is a favorite for the user, and send data if needed. If it's a favorite, we send the bound form for the favorite so it can populate the form on the page. If it is not a favorite, we send the unbound form. """ # Look up the court, cluster, title and favorite information cluster = get_object_or_404(OpinionCluster, pk=pk) title = '%s, %s' % ( trunc(best_case_name(cluster), 100), cluster.citation_string, ) get_string = search_utils.make_get_string(request) try: fave = Favorite.objects.get( cluster_id=cluster.pk, user=request.user, ) favorite_form = FavoriteForm(instance=fave) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ 'cluster_id': cluster.pk, 'name': trunc(best_case_name(cluster), 100, ellipsis='...'), } ) # Get the citing results from Solr for speed. conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r') q = { 'q': 'cites:({ids})'.format( ids=' OR '.join([str(pk) for pk in (cluster.sub_opinions .values_list('pk', flat=True))]) ), 'rows': 5, 'start': 0, 'sort': 'citeCount desc', 'caller': 'view_opinion', } citing_clusters = conn.raw_query(**q).execute() return render_to_response( 'view_opinion.html', { 'title': title, 'cluster': cluster, 'favorite_form': favorite_form, 'get_string': get_string, 'private': cluster.blocked, 'citing_clusters': citing_clusters, 'top_authorities': cluster.authorities[:5], }, RequestContext(request) )
def add_or_update_cluster(pk, force_commit=True): si = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='w') try: si.add([SearchDocument(item) for item in OpinionCluster.objects.get(pk=pk).sub_opinions.all()]) if force_commit: si.commit() except SolrError, exc: add_or_update_cluster.retry(exc=exc, countdown=30)
def add_or_update_people(item_pks, force_commit=True): si = sunburnt.SolrInterface(settings.SOLR_PEOPLE_URL, mode='w') try: si.add([SearchPerson(item) for item in Person.objects.filter(pk__in=item_pks)]) if force_commit: si.commit() except SolrError, exc: add_or_update_people.retry(exc=exc, countdown=30)
def add_or_update_opinions(item_pks, force_commit=True): si = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='w') try: si.add([SearchDocument(item) for item in Opinion.objects.filter(pk__in=item_pks)]) if force_commit: si.commit() except SolrError, exc: add_or_update_opinions.retry(exc=exc, countdown=30)
def add_or_update_audio_files(item_pks, force_commit=True): si = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='w') try: si.add([SearchAudioFile(item) for item in Audio.objects.filter(pk__in=item_pks)]) if force_commit: si.commit() except SolrError, exc: add_or_update_audio_files.retry(exc=exc, countdown=30)
def items(self, obj): conn = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='r') params = { 'q': '*:*', 'sort': 'dateArgued desc', 'rows': '20', 'start': '0', 'caller': 'AllJurisdictionsPodcast', } return conn.raw_query(**params).execute()
def place_facet_queries(cd, conn=sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r')): """Get facet values for the status filters Using the search form, query Solr and get the values for the status filters. """ # Build up all the queries needed facet_params = { 'rows': '0', 'facet': 'true', 'facet.mincount': 0, 'facet.field': '{!ex=dt}status_exact', 'q': cd['q'] or '*:*', 'caller': 'facet_parameters', } fq = [] # Case Name and judges if cd['case_name']: fq.append(make_fq(cd, 'caseName', 'case_name')) if cd['judge']: fq.append(make_fq(cd, 'judge', 'judge')) # Citations if cd['citation']: fq.append(make_fq_proximity_query(cd, 'citation', 'citation')) if cd['docket_number']: fq.append(make_fq(cd, 'docketNumber', 'docket_number')) if cd['neutral_cite']: fq.append(make_fq(cd, 'neutralCite', 'neutral_cite')) fq.append( make_date_query('dateFiled', cd['filed_before'], cd['filed_after'])) fq.append(make_cite_count_query(cd)) # Faceting selected_courts_string = get_selected_field_string( cd, 'court_') # Status facets depend on court checkboxes selected_stats_string = get_selected_field_string(cd, 'stat_') if len(selected_stats_string) > 0: fq.extend([ '{!tag=dt}status_exact:(%s)' % selected_stats_string, 'court_exact:(%s)' % selected_courts_string ]) # If a param has been added to the fq variables, then we add them to the # main_params var. Otherwise, we don't, as doing so throws an error. if len(fq) > 0: facet_params['fq'] = fq stat_facet_fields = conn.raw_query( **facet_params).execute().facet_counts.facet_fields return stat_facet_fields
def setUp(self): # Set up testing cores in Solr and swap them in self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME create_solr_core(self.core_name_opinion) create_solr_core( self.core_name_audio, schema=os.path.join(settings.INSTALL_ROOT, 'Solr', 'conf', 'audio_schema.xml'), instance_dir='/usr/local/solr/example/solr/audio', ) # swap_solr_core('collection1', self.core_name_opinion) # swap_solr_core('audio', self.core_name_audio) self.si_opinion = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='rw') self.si_audio = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='rw') self.si_opinion.commit() self.si_audio.commit()
def items(self, obj): """Do a Solr query here. Return the first 20 results""" conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r') params = { 'q': '*:*', 'sort': 'dateFiled desc', 'rows': '20', 'start': '0', 'caller': 'AllJurisdictionsFeed', } return conn.raw_query(**params).execute()
def oral_argument_sitemap_maker(request): conn = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='r') page = int(request.GET.get("p", 1)) start = (page - 1) * items_per_sitemap params = { 'q': '*:*', 'rows': items_per_sitemap, 'start': start, 'fl': ','.join([ 'absolute_url', 'dateArgued', 'local_path', 'citeCount', 'timestamp', ]), 'sort': 'dateArgued asc', 'caller': 'oral_argument_sitemap_maker', } search_results_object = conn.raw_query(**params).execute() # Translate Solr object into something Django's template can use urls = [] for result in search_results_object: url_strs = ['https://www.courtlistener.com%s' % result['absolute_url']] if result.get('local_path') and result.get('local_path') != '': url_strs.append('https://www.courtlistener.com/%s' % result['local_path']) sitemap_item = {} for url_str in url_strs: sitemap_item['location'] = url_str sitemap_item['changefreq'] = 'yearly' sitemap_item['lastmod'] = result['timestamp'] if any(s in url_str for s in ['mp3']): sitemap_item['priority'] = '0.3' else: sitemap_item['priority'] = '0.5' urls.append(dict(sitemap_item)) xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls})) # These links contain case names, so they should get crawled but not # indexed response = HttpResponse(xml, content_type='application/xml') response['X-Robots-Tag'] = 'noindex, noodp, noarchive, noimageindex' return response
def items(self, obj): """ Returns a list of items to publish in this feed. """ conn = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='r') params = { 'q': '*:*', 'fq': 'court_exact:%s' % obj.pk, 'sort': 'dateArgued desc', 'rows': '20', 'start': '0', 'caller': 'JurisdictionPodcast', } return conn.raw_query(**params).execute()
def do_search(request, rows=20, order_by=None, type=None): # Bind the search form. search_form = SearchForm(request.GET) if search_form.is_valid(): cd = search_form.cleaned_data # Allows an override by calling methods. if order_by: cd['order_by'] = order_by if type: cd['type'] = type search_form = _clean_form(request, cd) try: if cd['type'] == 'o': conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r') stat_facet_fields = search_utils.place_facet_queries(cd, conn) status_facets = search_utils.make_stats_variable( stat_facet_fields, search_form) elif cd['type'] == 'oa': conn = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='r') status_facets = None results_si = conn.raw_query(**search_utils.build_main_query(cd)) courts = Court.objects.filter(in_use=True).values( 'pk', 'short_name', 'jurisdiction', 'has_oral_argument_scraper') courts, court_count_human, court_count = search_utils\ .merge_form_with_courts(courts, search_form) except Exception, e: logger.warning("Error loading search with request: %s" % request.GET) logger.warning("Error was %s" % e) return {'error': True}
def items(self, obj): search_form = SearchForm(obj.GET) if search_form.is_valid(): cd = search_form.cleaned_data conn = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='r') main_params = search_utils.build_main_query(cd, highlight=False) main_params.update({ 'sort': 'dateArgued desc', 'rows': '20', 'start': '0', 'caller': 'SearchFeed', }) return conn.raw_query(**main_params).execute() else: return []
def setUp(self): # Set up testing cores in Solr and swap them in self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME self.core_name_people = settings.SOLR_PEOPLE_TEST_CORE_NAME root = settings.INSTALL_ROOT create_temp_solr_core( self.core_name_opinion, os.path.join(root, 'Solr', 'conf', 'schema.xml'), ) create_temp_solr_core( self.core_name_audio, os.path.join(root, 'Solr', 'conf', 'audio_schema.xml'), ) create_temp_solr_core( self.core_name_people, os.path.join(root, 'Solr', 'conf', 'person_schema.xml'), ) self.si_opinion = sunburnt.SolrInterface( settings.SOLR_OPINION_URL, mode='rw') self.si_audio = sunburnt.SolrInterface( settings.SOLR_AUDIO_URL, mode='rw') self.si_people = sunburnt.SolrInterface( settings.SOLR_PEOPLE_URL, mode='rw')
def match_citation(citation, citing_doc=None): """For a citation object, try to match it to an item in the database using a variety of heuristics. Returns: - a Solr Result object with the results, or an empty list if no hits """ # TODO: Create shared solr connection for all queries conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r') main_params = { 'q': '*', 'fq': [ 'status:Precedential', # Non-precedential documents aren't cited ], 'caller': 'citation.match_citations.match_citation', } if citing_doc is not None: # Eliminate self-cites. main_params['fq'].append('-id:%s' % citing_doc.pk) # Set up filter parameters if citation.year: start_year = end_year = citation.year else: start_year, end_year = get_years_from_reporter(citation) if citing_doc is not None and citing_doc.cluster.date_filed: end_year = min(end_year, citing_doc.cluster.date_filed.year) main_params['fq'].append( 'dateFiled:%s' % build_date_range(start_year, end_year) ) if citation.court: main_params['fq'].append('court_exact:%s' % citation.court) # Take 1: Use a phrase query to search the citation field. main_params['fq'].append( 'citation:("%s")' % citation.base_citation() ) results = conn.raw_query(**main_params).execute() if len(results) == 1: return results if len(results) > 1: if citing_doc is not None and citation.defendant: # Refine using defendant, if there is one results = case_name_query(conn, main_params, citation, citing_doc) return results # Give up. return []
def handle(self, *args, **options): self.verbosity = int(options.get('verbosity', 1)) self.solr_url = options['solr_url'] self.si = sunburnt.SolrInterface(self.solr_url, mode='rw') self.options = options self.type = options['type'] self.noinput = options['noinput'] if options['update']: if self.verbosity >= 1: self.stdout.write('Running in update mode...\n') if options.get('everything'): self.add_or_update_all() elif options.get('datetime'): self.add_or_update_by_datetime(options['datetime']) elif options.get('query'): self.stderr.write("Updating by query not implemented.") sys.exit(1) elif options.get('items'): self.add_or_update(*options['items']) elif options.get('delete'): if self.verbosity >= 1: self.stdout.write('Running in deletion mode...\n') if options.get('everything'): self.delete_all() elif options.get('datetime'): self.delete_by_datetime(options['datetime']) elif options.get('query'): self.delete_by_query(options['query']) elif options.get('items'): self.delete(*options['items']) elif options.get('do_commit'): self.commit() elif options.get('optimize'): self.optimize() else: self.stderr.write('Error: You must specify whether you wish to ' 'update, delete, commit, or optimize your ' 'index.\n') sys.exit(1)
def get_expected_item_count(self): # OpinionsSitemap uses the solr index to generate the page, so the only # accurate count comes from the index itself which will also be based on # the fixtures. conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r') opinion_solr_params['rows'] = 1000 search_results_object = conn.raw_query(**opinion_solr_params).execute() # the underlying SitemapTest relies on counting url elements in the xml # response...this logic mimics the creation of the xml, so we at least # know what we *should* get getting for a count if the SiteMapTest's # HTTP client-based test gets an HTTP 200 count = 0 for result in search_results_object: if result.get('local_path'): count += 3 else: count += 2 return count
def handle(self, *args, **options): super(Command, self).handle(*args, **options) both_list_and_endpoints = options.get("doc_id") is not None and ( options.get("start_id") is not None or options.get("end_id") is not None or options.get("filed_after") is not None) no_option = not any([ options.get("doc_id") is None, options.get("start_id") is None, options.get("end_id") is None, options.get("filed_after") is None, options.get("all") is False, ]) if both_list_and_endpoints or no_option: raise CommandError("Please specify either a list of documents, a " "range of ids, a range of dates, or " "everything.") self.index = options["index"] self.si = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode="rw") # Use query chaining to build the query query = Opinion.objects.all() if options.get("doc_id"): query = query.filter(pk__in=options.get("doc_id")) if options.get("end_id"): query = query.filter(pk__lte=options.get("end_id")) if options.get("start_id"): query = query.filter(pk__gte=options.get("start_id")) if options.get("filed_after"): query = query.filter( cluster__date_filed__gte=options["filed_after"]) if options.get("all"): query = Opinion.objects.all() self.count = query.count() self.average_per_s = 0 self.timings = [] count = query.count() opinion_pks = query.values_list("pk", flat=True).iterator() self.update_documents(opinion_pks, count) self.add_to_solr()
def handle(self, *args, **options): super(Command, self).handle(*args, **options) both_list_and_endpoints = (options.get('doc_id') is not None and (options.get('start_id') is not None or options.get('end_id') is not None or options.get('filed_after') is not None)) no_option = (not any([options.get('doc_id') is None, options.get('start_id') is None, options.get('end_id') is None, options.get('filed_after') is None, options.get('all') is False])) if both_list_and_endpoints or no_option: raise CommandError('Please specify either a list of documents, a ' 'range of ids, a range of dates, or ' 'everything.') self.index = options['index'] self.si = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='rw') # Use query chaining to build the query query = Opinion.objects.all() if options.get('doc_id'): query = query.filter(pk__in=options.get('doc_id')) if options.get('end_id'): query = query.filter(pk__lte=options.get('end_id')) if options.get('start_id'): query = query.filter(pk__gte=options.get('start_id')) if options.get('filed_after'): query = query.filter( cluster__date_filed__gte=options['filed_after']) if options.get('all'): query = Opinion.objects.all() self.count = query.count() self.average_per_s = 0 self.timings = [] count = query.count() opinion_pks = query.values_list('pk', flat=True).iterator() self.update_documents(opinion_pks, count) self.add_to_solr()
def opinion_sitemap_maker(request): conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r') page = request.GET.get("p", '1') opinion_solr_params['start'] = (int(page) - 1) * items_per_sitemap search_results_object = conn.raw_query(**opinion_solr_params).execute() # Translate Solr object into something Django's template can use urls = [] for result in search_results_object: url_strs = [ 'https://www.courtlistener.com%s' % result['absolute_url'], 'https://www.courtlistener.com%sauthorities/' % result['absolute_url'] ] if result.get('local_path') and result.get('local_path') != '': url_strs.append( 'https://www.courtlistener.com/%s' % result['local_path']) sitemap_item = {} for url_str in url_strs: sitemap_item['location'] = url_str sitemap_item['changefreq'] = 'yearly' sitemap_item['lastmod'] = result['timestamp'] if any(s in url_str for s in ['authorities', 'pdf', 'doc', 'wpd']): sitemap_item['priority'] = '0.3' else: sitemap_item['priority'] = '0.5' urls.append(dict(sitemap_item)) xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls})) # These links contain case names, so they should get crawled but not # indexed response = HttpResponse(xml, content_type='application/xml') response['X-Robots-Tag'] = 'noindex, noodp, noarchive, noimageindex' return response
def get_citing_clusters_with_cache(cluster, is_bot): """Use Solr to get clusters citing the one we're looking at If it's not a bot, cache the results for a long time. If it is a bot, load those results if they exist. Otherwise, return None. :param cluster: The cluster we're targeting :type cluster: OpinionCluster :param is_bot: Whether the page running this was loaded by a bot :type is_bot: bool :return: A search result of the top five citing clusters or None :rtype: SolrSearch or None """ cache_key = "citing:%s" % cluster.pk cache = caches["db_cache"] if is_bot: # If the cache was set by a real user, bots can access it. But if no # user set the cache, this will just return None. return cache.get(cache_key) # Get the citing results from Solr for speed. Only do this for humans # to save on disk usage. sub_opinion_pks = cluster.sub_opinions.values_list("pk", flat=True) ids_str = " OR ".join([str(pk) for pk in sub_opinion_pks]) q = { "q": "cites:(%s)" % ids_str, "rows": 5, "start": 0, "sort": "citeCount desc", "caller": "view_opinion", } conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode="r") citing_clusters = conn.raw_query(**q).execute() a_month = 60 * 60 * 24 * 30 cache.set(cache_key, citing_clusters, a_month) return citing_clusters
def handle(self, *args, **options): both_list_and_endpoints = (options.get('doc_id') is not None and (options.get('start_id') is not None or options.get('end_id') is not None or options.get('filed_after') is not None)) no_option = (not any([ options.get('doc_id') is None, options.get('start_id') is None, options.get('end_id') is None, options.get('filed_after') is None, options.get('all') is False ])) if both_list_and_endpoints or no_option: raise CommandError('Please specify either a list of documents, a ' 'range of ids, a range of dates, or ' 'everything.') self.index = options['index'] self.si = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='rw') # Use query chaining to build the query query = Opinion.objects.all() if options.get('doc_id'): query = query.filter(pk__in=options.get('doc_id')) if options.get('end_id'): query = query.filter(pk__lte=options.get('end_id')) if options.get('start_id'): query = query.filter(pk__gte=options.get('start_id')) if options.get('filed_after'): query = query.filter( cluster__date_filed__gte=options['filed_after']) if options.get('all'): query = Opinion.objects.all() count = query.count() docs = queryset_generator(query, chunksize=10000) self.update_documents(docs, count)