def core_docket_data(request, pk): """Gather the core data for a docket, party, or IDB page.""" docket = get_object_or_404(Docket, pk=pk) title = ', '.join([s for s in [ trunc(best_case_name(docket), 100, ellipsis="..."), docket.docket_number, ] if s.strip()]) try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ 'docket_id': docket.pk, 'name': trunc(best_case_name(docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) has_alert = False if request.user.is_authenticated: has_alert = DocketAlert.objects.filter(docket=docket, user=request.user).exists() return docket, { 'docket': docket, 'title': title, 'favorite_form': favorite_form, 'has_alert': has_alert, 'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'), 'private': docket.blocked, }
def core_docket_data(request, pk): """Gather the core data for a docket, party, or IDB page.""" docket = get_object_or_404(Docket, pk=pk) title = ', '.join([ s for s in [ trunc(best_case_name(docket), 100, ellipsis="..."), docket.docket_number, ] if s.strip() ]) try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ 'docket_id': docket.pk, 'name': trunc(best_case_name(docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) has_alert = False if request.user.is_authenticated: has_alert = DocketAlert.objects.filter(docket=docket, user=request.user).exists() return docket, { 'docket': docket, 'title': title, 'favorite_form': favorite_form, 'has_alert': has_alert, 'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'), 'private': docket.blocked, }
def view_opinion(request, pk, _): """Using the cluster ID, return the cluster of opinions. We also test if the cluster ID is a favorite for the user, and send data if needed. If it's a favorite, we send the bound form for the favorite so it can populate the form on the page. If it is not a favorite, we send the unbound form. """ # Look up the court, cluster, title and favorite information cluster = get_object_or_404(OpinionCluster, pk=pk) title = '%s, %s' % ( trunc(best_case_name(cluster), 100), cluster.citation_string, ) get_string = search_utils.make_get_string(request) try: fave = Favorite.objects.get( cluster_id=cluster.pk, user=request.user, ) favorite_form = FavoriteForm(instance=fave) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ 'cluster_id': cluster.pk, 'name': trunc(best_case_name(cluster), 100, ellipsis='...'), } ) # Get the citing results from Solr for speed. conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r') q = { 'q': 'cites:({ids})'.format( ids=' OR '.join([str(pk) for pk in (cluster.sub_opinions .values_list('pk', flat=True))]) ), 'rows': 5, 'start': 0, 'sort': 'citeCount desc', 'caller': 'view_opinion', } citing_clusters = conn.raw_query(**q).execute() return render_to_response( 'view_opinion.html', { 'title': title, 'cluster': cluster, 'favorite_form': favorite_form, 'get_string': get_string, 'private': cluster.blocked, 'citing_clusters': citing_clusters, 'top_authorities': cluster.authorities[:5], }, RequestContext(request) )
def view_parties(request, docket_id, slug): """Show the parties and attorneys tab on the docket.""" docket = get_object_or_404(Docket, pk=docket_id) title = ', '.join([ s for s in [ trunc(best_case_name(docket), 100, ellipsis="..."), docket.docket_number, ] if s.strip() ]) try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ 'docket_id': docket.pk, 'name': trunc(best_case_name(docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) # We work with this data at the level of party_types so that we can group # the parties by this field. From there, we do a whole mess of prefetching, # which reduces the number of queries needed for this down to four instead # of potentially thousands (good times!) party_types = docket.party_types.select_related('party').prefetch_related( Prefetch( 'party__roles', queryset=Role.objects.filter(docket=docket).order_by( 'attorney_id', 'role', 'date_action').select_related('attorney').prefetch_related( Prefetch( 'attorney__organizations', queryset=AttorneyOrganization.objects.filter( attorney_organization_associations__docket=docket). distinct(), to_attr='firms_in_docket', )))).order_by('name', 'party__name') parties = [] for party_type_name, party_types in groupby(party_types, lambda x: x.name): party_types = list(party_types) parties.append({ 'party_type_name': party_type_name, 'party_type_objects': party_types }) return render( request, 'docket_parties.html', { 'docket': docket, 'title': title, 'parties': parties, 'favorite_form': favorite_form, 'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'), 'private': docket.blocked, })
def view_opinion(request, pk, _): """Using the cluster ID, return the cluster of opinions. We also test if the cluster ID is a favorite for the user, and send data if needed. If it's a favorite, we send the bound form for the favorite so it can populate the form on the page. If it is not a favorite, we send the unbound form. """ # Look up the court, cluster, title and favorite information cluster = get_object_or_404(OpinionCluster, pk=pk) title = ", ".join([ s for s in [ trunc(best_case_name(cluster), 100, ellipsis="..."), cluster.citation_string, ] if s.strip() ]) has_downloads = False for sub_opinion in cluster.sub_opinions.all(): if sub_opinion.local_path or sub_opinion.download_url: has_downloads = True break get_string = make_get_string(request) try: fave = Favorite.objects.get(cluster_id=cluster.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ "cluster_id": cluster.pk, "name": trunc(best_case_name(cluster), 100, ellipsis="..."), }) else: favorite_form = FavoriteForm(instance=fave) citing_clusters = get_citing_clusters_with_cache(cluster, is_bot(request)) return render( request, "view_opinion.html", { "title": title, "cluster": cluster, "has_downloads": has_downloads, "favorite_form": favorite_form, "get_string": get_string, "private": cluster.blocked, "citing_clusters": citing_clusters, "top_authorities": cluster.authorities_with_data[:5], "authorities_count": len(cluster.authorities_with_data), }, )
def upload_free_opinion_to_ia(self, rd_pk): rd = RECAPDocument.objects.get(pk=rd_pk) d = rd.docket_entry.docket file_name = get_document_filename( d.court_id, d.pacer_case_id, rd.document_number, 0, # Attachment number is zero for all free opinions. ) bucket_name = get_bucket_name(d.court_id, d.pacer_case_id) try: responses = upload_to_ia( identifier=bucket_name, files=rd.filepath_local.path, metadata={ 'title': best_case_name(d), 'collection': settings.IA_COLLECTIONS, 'contributor': '<a href="https://free.law">Free Law Project</a>', 'court': d.court_id, 'language': 'eng', 'mediatype': 'texts', 'description': "This item represents a case in PACER, " "the U.S. Government's website for " "federal case data. If you wish to see " "the entire case, please consult PACER " "directly.", 'licenseurl': 'https://www.usa.gov/government-works', }, ) except (OverloadedException, ExpatError) as exc: # Overloaded: IA wants us to slow down. # ExpatError: The syntax of the XML file that's supposed to be returned # by IA is bad (or something). if self.request.retries == self.max_retries: # Give up for now. It'll get done next time cron is run. return raise self.retry(exc=exc) except HTTPError as exc: if exc.response.status_code in [ HTTP_403_FORBIDDEN, # Can't access bucket, typically. HTTP_400_BAD_REQUEST, # Corrupt PDF, typically. ]: return [exc.response] if self.request.retries == self.max_retries: # This exception is also raised when the endpoint is overloaded, but # doesn't get caught in the OverloadedException below due to # multiple processes running at the same time. Just give up for now. return raise self.retry(exc=exc) except (requests.Timeout, requests.RequestException) as exc: logger.warning("Timeout or unknown RequestException. Unable to upload " "to IA. Trying again if retries not exceeded: %s" % rd) if self.request.retries == self.max_retries: # Give up for now. It'll get done next time cron is run. return raise self.retry(exc=exc) if all(r.ok for r in responses): rd.filepath_ia = "https://archive.org/download/%s/%s" % ( bucket_name, file_name) rd.save(do_extraction=False, index=False)
def save(self, *args, **kwargs): self.slug = slugify(trunc(best_case_name(self), 75)) if self.source == 1 and not self.pacer_case_id: raise ValidationError("pacer_case_id cannot be Null or empty in " "RECAP documents.") super(Docket, self).save(*args, **kwargs)
def caption(self): """Make a proper caption""" caption = best_case_name(self) if self.neutral_cite: caption += ", %s" % self.neutral_cite return caption # neutral cites lack the parentheses, so we're done here. elif self.federal_cite_one: caption += ", %s" % self.federal_cite_one elif self.specialty_cite_one: caption += ", %s" % self.specialty_cite_one elif self.state_cite_regional: caption += ", %s" % self.state_cite_regional elif self.state_cite_one: caption += ", %s" % self.state_cite_one elif self.westlaw_cite and self.lexis_cite: # If both WL and LEXIS caption += ", %s, %s" % (self.westlaw_cite, self.lexis_cite) elif self.westlaw_cite: # If only WL caption += ", %s" % self.westlaw_cite elif self.lexis_cite: # If only LEXIS caption += ", %s" % self.lexis_cite elif self.docket.docket_number: caption += ", %s" % self.docket.docket_number caption += ' (' if self.docket.court.citation_string != 'SCOTUS': caption += re.sub(' ', ' ', self.docket.court.citation_string) caption += ' ' caption += '%s)' % self.date_filed.isoformat().split('-')[ 0] # b/c strftime f's up before 1900. return caption
def view_recap_document(request, docket_id=None, doc_num=None, att_num=None, slug=''): """This view can either load an attachment or a regular document, depending on the URL pattern that is matched. """ item = get_object_or_404( RECAPDocument, docket_entry__docket__id=docket_id, document_number=doc_num, attachment_number=att_num, ) title = '%sDocument #%s%s in %s' % ( '%s – ' % item.description if item.description else '', item.document_number, ', Attachment #%s' % item.attachment_number if item.document_type == RECAPDocument.ATTACHMENT else '', best_case_name(item.docket_entry.docket), ) try: fave = Favorite.objects.get(recap_doc_id=item.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ 'recap_doc_id': item.pk, 'name': trunc(title, 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) return render(request, 'recap_document.html', { 'document': item, 'title': title, 'favorite_form': favorite_form, 'private': True, # Always True for RECAP docs. })
def upload_recap_json(self, pk): """Make a JSON object for a RECAP docket and upload it to IA""" d, json_str = generate_ia_json(pk) file_name = get_docket_filename(d.court_id, d.pacer_case_id, 'json') bucket_name = get_bucket_name(d.court_id, d.pacer_case_id) responses = upload_to_ia( self, identifier=bucket_name, files={file_name: StringIO(json_str)}, title=best_case_name(d), collection=settings.IA_COLLECTIONS, court_id=d.court_id, source_url='https://www.courtlistener.com%s' % d.get_absolute_url(), media_type='texts', description="This item represents a case in PACER, the U.S. " "Government's website for federal case data. This " "information is uploaded quarterly. To see our most " "recent version please use the source url parameter, " "linked below. To see the canonical source for this data, " "please consult PACER directly.", ) if responses is None: increment_failure_count(d) return if all(r.ok for r in responses): d.ia_upload_failure_count = None d.ia_date_first_changed = None d.ia_needs_upload = False d.filepath_ia_json = "https://archive.org/download/%s/%s" % ( bucket_name, file_name) d.save() else: increment_failure_count(d)
def save(self, index=True, force_commit=False, *args, **kwargs): self.slug = slugify(trunc(best_case_name(self), 75)) super(OpinionCluster, self).save(*args, **kwargs) if index: from cl.search.tasks import add_or_update_cluster add_or_update_cluster.delay(self.pk, force_commit)
def core_docket_data( request: HttpRequest, pk: int, ) -> Tuple[Docket, Dict[str, Union[bool, str, Docket, FavoriteForm]]]: """Gather the core data for a docket, party, or IDB page.""" docket = get_object_or_404(Docket, pk=pk) title = make_docket_title(docket) try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ "docket_id": docket.pk, "name": trunc(best_case_name(docket), 100, ellipsis="..."), } ) else: favorite_form = FavoriteForm(instance=fave) has_alert = user_has_alert(request.user, docket) return ( docket, { "docket": docket, "title": title, "favorite_form": favorite_form, "has_alert": has_alert, "timezone": COURT_TIMEZONES.get(docket.court_id, "US/Eastern"), "private": docket.blocked, }, )
def view_audio_file(request, pk, _): """Using the ID, return the oral argument page. We also test if the item is a favorite and send data as such. """ af = get_object_or_404(Audio, pk=pk) title = trunc(af.case_name, 100) get_string = search_utils.make_get_string(request) try: fave = Favorite.objects.get(audio_id=af.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ 'audio_id': af.pk, 'name': trunc(best_case_name(af.docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) return render( request, 'oral_argument.html', { 'title': title, 'af': af, 'favorite_form': favorite_form, 'get_string': get_string, 'private': af.blocked, })
def upload_pdf_to_ia(self, rd_pk): rd = RECAPDocument.objects.get(pk=rd_pk) d = rd.docket_entry.docket file_name = get_document_filename( d.court_id, d.pacer_case_id, rd.document_number, rd.attachment_number or 0, ) bucket_name = get_bucket_name(d.court_id, d.pacer_case_id) responses = upload_to_ia( self, identifier=bucket_name, files=rd.filepath_local.path, title=best_case_name(d), collection=settings.IA_COLLECTIONS, court_id=d.court_id, source_url='https://www.courtlistener.com%s' % rd.get_absolute_url(), media_type='texts', description="This item represents a case in PACER, the U.S. " "Government's website for federal case data. If you wish " "to see the entire case, please consult PACER directly.", ) if responses is None: increment_failure_count(rd) return if all(r.ok for r in responses): rd.ia_upload_failure_count = None rd.filepath_ia = "https://archive.org/download/%s/%s" % (bucket_name, file_name) rd.save() else: increment_failure_count(rd)
def view_audio_file(request, pk, _): """Using the ID, return the oral argument page. We also test if the item is a favorite and send data as such. """ af = get_object_or_404(Audio, pk=pk) title = trunc(af.case_name, 100) get_string = search_utils.make_get_string(request) try: fave = Favorite.objects.get(audio_id=af.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ 'audio_id': af.pk, 'name': trunc(best_case_name(af.docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) return render(request, 'oral_argument.html', { 'title': title, 'af': af, 'favorite_form': favorite_form, 'get_string': get_string, 'private': af.blocked, })
def caption(self): """Make a proper caption""" caption = best_case_name(self) if self.neutral_cite: caption += ", %s" % self.neutral_cite return caption # neutral cites lack the parentheses, so we're done here. elif self.federal_cite_one: caption += ", %s" % self.federal_cite_one elif self.federal_cite_two: caption += ", %s" % self.federal_cite_two elif self.federal_cite_three: caption += ", %s" % self.federal_cite_three elif self.specialty_cite_one: caption += ", %s" % self.specialty_cite_one elif self.state_cite_regional: caption += ", %s" % self.state_cite_regional elif self.state_cite_one: caption += ", %s" % self.state_cite_one elif self.westlaw_cite and self.lexis_cite: # If both WL and LEXIS caption += ", %s, %s" % (self.westlaw_cite, self.lexis_cite) elif self.westlaw_cite: # If only WL caption += ", %s" % self.westlaw_cite elif self.lexis_cite: # If only LEXIS caption += ", %s" % self.lexis_cite elif self.docket.docket_number: caption += ", %s" % self.docket.docket_number caption += ' (' if self.docket.court.citation_string != 'SCOTUS': caption += re.sub(' ', ' ', self.docket.court.citation_string) caption += ' ' caption += '%s)' % self.date_filed.isoformat().split('-')[0] # b/c strftime f's up before 1900. return caption
def upload_audio_to_ia(self, af_pk): af = Audio.objects.get(pk=af_pk) d = af.docket file_name = make_af_filename( d.court_id, d.docket_number, d.date_argued, af.local_path_original_file.path.rsplit('.', 1)[1]) bucket_name = get_bucket_name(d.court_id, slugify(d.docket_number)) responses = upload_to_ia( self, identifier=bucket_name, files={file_name: af.local_path_original_file.path}, title=best_case_name(d), collection=settings.IA_OA_COLLECTIONS, court_id=d.court_id, source_url='https://www.courtlistener.com%s' % af.get_absolute_url(), media_type='audio', description='This item represents an oral argument audio file as ' 'scraped from a U.S. Government website by Free Law ' 'Project.', ) if responses is None: increment_failure_count(af) return if all(r.ok for r in responses): af.ia_upload_failure_count = None af.filepath_ia = "https://archive.org/download/%s/%s" % (bucket_name, file_name) af.save() else: increment_failure_count(af)
def upload_recap_json(self, pk): """Make a JSON object for a RECAP docket and upload it to IA""" # This is a pretty highly optimized query that uses only 13 hits to the DB # when generating a docket JSON rendering, regardless of how many related # objects the docket has such as docket entries, parties, etc. ds = Docket.objects.filter(pk=pk).select_related( 'originating_court_information', ).prefetch_related( 'panel', 'parties__attorneys__roles', 'parties__party_types__criminal_complaints', 'parties__party_types__criminal_counts', # Django appears to have a bug where you can't defer a field on a # queryset where you prefetch the values. If you try to, it crashes. # We should be able to just do the prefetch below like the ones above # and then do the defer statement at the end, but that throws an error. Prefetch( 'docket_entries__recap_documents', queryset=RECAPDocument.objects.all().defer('plain_text') ) ) d = ds[0] renderer = JSONRenderer() json_str = renderer.render( IADocketSerializer(d).data, accepted_media_type='application/json; indent=2', ) file_name = get_docket_filename(d.court_id, d.pacer_case_id, 'json') bucket_name = get_bucket_name(d.court_id, d.pacer_case_id) responses = upload_to_ia( self, identifier=bucket_name, files={file_name: StringIO(json_str)}, title=best_case_name(d), collection=settings.IA_COLLECTIONS, court_id=d.court_id, source_url='https://www.courtlistener.com%s' % d.get_absolute_url(), media_type='texts', description="This item represents a case in PACER, the U.S. " "Government's website for federal case data. This " "information is uploaded quarterly. To see our most " "recent version please use the source url parameter, " "linked below. To see the canonical source for this data, " "please consult PACER directly.", ) if responses is None: increment_failure_count(d) return if all(r.ok for r in responses): d.ia_upload_failure_count = None d.ia_date_first_changed = None d.filepath_ia_json = "https://archive.org/download/%s/%s" % ( bucket_name, file_name) mark_ia_upload_needed(d) d.save() else: increment_failure_count(d)
def view_docket(request, pk, slug): docket = get_object_or_404(Docket, pk=pk) if not is_bot(request): with suppress_autotime(docket, ['date_modified']): cached_count = docket.view_count docket.view_count = F('view_count') + 1 docket.save() docket.view_count = cached_count + 1 try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ 'docket_id': docket.pk, 'name': trunc(best_case_name(docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) de_list = docket.docket_entries.all().prefetch_related('recap_documents') form = DocketEntryFilterForm(request.GET) if form.is_valid(): cd = form.cleaned_data if cd.get('entry_gte'): de_list = de_list.filter(entry_number__gte=cd['entry_gte']) if cd.get('entry_lte'): de_list = de_list.filter(entry_number__lte=cd['entry_lte']) if cd.get('filed_after'): de_list = de_list.filter(date_filed__gte=cd['filed_after']) if cd.get('filed_before'): de_list = de_list.filter(date_filed__lte=cd['filed_before']) if cd.get('order_by') == DocketEntryFilterForm.DESCENDING: de_list = de_list.order_by('-entry_number') paginator = Paginator(de_list, 100, orphans=5) page = request.GET.get('page') try: docket_entries = paginator.page(page) except PageNotAnInteger: docket_entries = paginator.page(1) except EmptyPage: docket_entries = paginator.page(paginator.num_pages) return render( request, 'view_docket.html', { 'docket': docket, 'parties': docket.parties.exists(), # Needed to show/hide parties tab. 'docket_entries': docket_entries, 'form': form, 'favorite_form': favorite_form, 'get_string': make_get_string(request), 'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'), 'private': docket.blocked, })
def make_docket_title(docket): title = ", ".join([ s for s in [ trunc(best_case_name(docket), 100, ellipsis="..."), docket.docket_number, ] if s.strip() ]) return title
def as_search_dict(self): """Create a dict that can be ingested by Solr""" # IDs out = { 'id': self.pk, 'docket_id': self.docket_id, 'court_id': self.docket.court_id, } # Docket docket = {'docketNumber': self.docket.docket_number} if self.docket.date_argued is not None: docket['dateArgued'] = datetime.combine(self.docket.date_argued, time()) if self.docket.date_reargued is not None: docket['dateReargued'] = datetime.combine( self.docket.date_reargued, time()) if self.docket.date_reargument_denied is not None: docket['dateReargumentDenied'] = datetime.combine( self.docket.date_reargument_denied, time()) out.update(docket) # Court out.update({ 'court': self.docket.court.full_name, 'court_citation_string': self.docket.court.citation_string, 'court_exact': self.docket.court_id, # For faceting }) # Audio File out.update({ 'caseName': best_case_name(self), 'panel_ids': [judge.pk for judge in self.panel.all()], 'judge': self.judges, 'file_size_mp3': deepgetattr(self, 'local_path_mp3.size', None), 'duration': self.duration, 'source': self.source, 'download_url': self.download_url, 'local_path': unicode(getattr(self, 'local_path_mp3', None)) }) try: out['absolute_url'] = self.get_absolute_url() except NoReverseMatch: raise InvalidDocumentError( "Unable to save to index due to missing absolute_url: %s" % self.pk) text_template = loader.get_template('indexes/audio_text.txt') out['text'] = text_template.render({'item': self}).translate(null_map) return normalize_search_dicts(out)
def as_search_dict(self): """Create a dict that can be ingested by Solr""" # IDs out = { 'id': self.pk, 'docket_id': self.docket_id, 'court_id': self.docket.court_id, } # Docket docket = {'docketNumber': self.docket.docket_number} if self.docket.date_argued is not None: docket['dateArgued'] = datetime.combine( self.docket.date_argued, time() ) if self.docket.date_reargued is not None: docket['dateReargued'] = datetime.combine( self.docket.date_reargued, time() ) if self.docket.date_reargument_denied is not None: docket['dateReargumentDenied'] = datetime.combine( self.docket.date_reargument_denied, time() ) out.update(docket) # Court out.update({ 'court': self.docket.court.full_name, 'court_citation_string': self.docket.court.citation_string, 'court_exact': self.docket.court_id, # For faceting }) # Audio File out.update({ 'caseName': best_case_name(self), 'panel_ids': [judge.pk for judge in self.panel.all()], 'judge': self.judges, 'file_size_mp3': deepgetattr(self, 'local_path_mp3.size', None), 'duration': self.duration, 'source': self.source, 'download_url': self.download_url, 'local_path': unicode(getattr(self, 'local_path_mp3', None)) }) try: out['absolute_url'] = self.get_absolute_url() except NoReverseMatch: raise InvalidDocumentError( "Unable to save to index due to missing absolute_url: %s" % self.pk ) text_template = loader.get_template('indexes/audio_text.txt') out['text'] = text_template.render({'item': self}).translate(null_map) return nuke_nones(out)
def as_search_dict(self): """Create a dict that can be ingested by Solr""" # IDs out = { "id": self.pk, "docket_id": self.docket_id, "court_id": self.docket.court_id, } # Docket docket = {"docketNumber": self.docket.docket_number} if self.docket.date_argued is not None: docket["dateArgued"] = midnight_pst(self.docket.date_argued) if self.docket.date_reargued is not None: docket["dateReargued"] = midnight_pst(self.docket.date_reargued) if self.docket.date_reargument_denied is not None: docket["dateReargumentDenied"] = midnight_pst( self.docket.date_reargument_denied ) out.update(docket) # Court out.update( { "court": self.docket.court.full_name, "court_citation_string": self.docket.court.citation_string, "court_exact": self.docket.court_id, # For faceting } ) # Audio File out.update( { "caseName": best_case_name(self), "panel_ids": [judge.pk for judge in self.panel.all()], "judge": self.judges, "file_size_mp3": deepgetattr( self, "local_path_mp3.size", None ), "duration": self.duration, "source": self.source, "download_url": self.download_url, "local_path": str(getattr(self, "local_path_mp3", None)), } ) try: out["absolute_url"] = self.get_absolute_url() except NoReverseMatch: raise InvalidDocumentError( "Unable to save to index due to missing absolute_url: %s" % self.pk ) text_template = loader.get_template("indexes/audio_text.txt") out["text"] = text_template.render({"item": self}).translate(null_map) return normalize_search_dicts(out)
def set_mp3_meta_data(audio_obj, mp3_path): """Sets the meta data on the mp3 file to good values. :param audio_obj: an Audio object to clean up. :param mp3_path: the path to the mp3 to be converted. """ court = audio_obj.docket.court # Load the file, delete the old tags and create a new one. audio_file = eyed3.load(mp3_path) # Undocumented API from eyed3.plugins.classic.ClassicPlugin#handleRemoves id3.Tag.remove(audio_file.tag.file_info.name, id3.ID3_ANY_VERSION, preserve_file_time=False) audio_file.initTag() audio_file.tag.title = best_case_name(audio_obj) audio_file.tag.album = u"{court}, {year}".format(court=court.full_name, year=audio_obj.docket.date_argued.year) audio_file.tag.artist = court.full_name audio_file.tag.artist_url = court.url audio_file.tag.audio_source_url = audio_obj.download_url audio_file.tag.comments.set( u"Argued: {date_argued}. Docket number: {docket_number}".format( date_argued=audio_obj.docket.date_argued.strftime("%Y-%m-%d"), docket_number=audio_obj.docket.docket_number ) ) audio_file.tag.genre = u"Speech" audio_file.tag.publisher = u"Free Law Project" audio_file.tag.publisher_url = u"https://free.law" audio_file.tag.recording_date = audio_obj.docket.date_argued.strftime("%Y-%m-%d") # Add images to the mp3. If it has a seal, use that for the Front Cover # and use the FLP logo for the Publisher Logo. If it lacks a seal, use the # Publisher logo for both the front cover and the Publisher logo. try: has_seal = seals_data[court.pk]["has_seal"] except AttributeError: # Unknown court in Seal Rookery. has_seal = False except KeyError: # Unknown court altogether (perhaps a test?) has_seal = False flp_image_frames = [3, 14] # "Front Cover". Complete list at eyed3/id3/frames.py # "Publisher logo". if has_seal: with open(os.path.join(seals_root, "512", "%s.png" % court.pk), "r") as f: audio_file.tag.images.set(3, f.read(), "image/png", u"Seal for %s" % court.short_name) flp_image_frames.remove(3) for frame in flp_image_frames: with open( os.path.join(settings.INSTALL_ROOT, "cl", "audio", "static", "png", "producer-300x300.png"), "r" ) as f: audio_file.tag.images.set( frame, f.read(), "image/png", u"Created for the public domain by Free Law Project" ) audio_file.tag.save()
def cluster_visualizations(request, pk, slug): cluster = get_object_or_404(OpinionCluster, pk=pk) return render(request, 'view_opinion_visualizations.html', { 'title': '%s, %s' % ( trunc(best_case_name(cluster), 100), cluster.citation_string ), 'cluster': cluster, 'private': cluster.blocked or cluster.has_private_authority, })
def send_docket_alert(d_pk, since): """Send an alert for a given docket :param d_pk: The docket PK that was modified :param since: If we run alerts, notify users about items *since* this time. :return: The dict that was passed in as data is simply passed through. The next task in the chain needs the same information. """ email_addresses = User.objects.filter( docket_alerts__docket_id=d_pk, ).distinct().values_list('email', flat=True) if email_addresses: # We have an alert for this docket. Proceed. docket = Docket.objects.get(pk=d_pk) new_des = DocketEntry.objects.filter(date_created__gte=since, docket=docket) if new_des.count() > 0: # Notify every user that's subscribed to this alert. case_name = trunc(best_case_name(docket), 100, ellipsis='...') subject_template = loader.get_template('docket_alert_subject.txt') subject = subject_template.render({ 'docket': docket, 'count': new_des.count(), 'case_name': case_name, }).strip() # Remove newlines that editors can insist on adding. email_context = {'new_des': new_des, 'docket': docket} txt_template = loader.get_template('docket_alert_email.txt') html_template = loader.get_template('docket_alert_email.html') messages = [] for email_address in email_addresses: msg = EmailMultiAlternatives( subject=subject, body=txt_template.render(email_context), from_email=settings.DEFAULT_ALERTS_EMAIL, to=[email_address], headers={'X-Entity-Ref-ID': 'docket.alert:%s' % d_pk}) html = html_template.render(email_context) msg.attach_alternative(html, "text/html") messages.append(msg) # Add a bcc to the first message in the list so that we get a copy. messages[0].bcc = ['*****@*****.**'] connection = get_connection() connection.send_messages(messages) tally_stat('alerts.docket.alerts.sent', inc=len(email_addresses)) DocketAlert.objects.filter(docket=docket).update(date_last_hit=now()) # Work completed, clear the semaphor r = redis.StrictRedis(host=settings.REDIS_HOST, port=settings.REDIS_PORT, db=settings.REDIS_DATABASES['ALERTS']) r.delete(make_alert_key(d_pk))
def process_audio_file(pk): """Given the key to an audio file, extract its content and add the related meta data to the database. """ af = Audio.objects.get(pk=pk) tmp_path = os.path.join("/tmp", "audio_" + uuid.uuid4().hex + ".mp3") av_path = get_audio_binary() av_command = [ av_path, "-i", af.local_path_original_file.path, "-ar", "22050", # sample rate (audio samples/s) of 22050Hz "-ab", "48k", # constant bit rate (sample resolution) of 48kbps tmp_path, ] try: _ = subprocess.check_output(av_command, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: print( "%s failed command: %s\nerror code: %s\noutput: %s\n%s" % ( av_path, av_command, e.returncode, e.output, traceback.format_exc(), ) ) raise set_mp3_meta_data(af, tmp_path) try: with open(tmp_path, "r") as mp3: cf = ContentFile(mp3.read()) file_name = trunc(best_case_name(af).lower(), 72) + "_cl.mp3" af.file_with_date = af.docket.date_argued af.local_path_mp3.save(file_name, cf, save=False) except: msg = ( "Unable to save mp3 to audio_file in scraper.tasks." "process_audio_file for item: %s\n" "Traceback:\n" "%s" % (af.pk, traceback.format_exc()) ) print(msg) ErrorLog.objects.create( log_level="CRITICAL", court=af.docket.court, message=msg ) af.duration = eyed3.load(tmp_path).info.time_secs af.processing_complete = True af.save()
def make_rd_title(rd): de = rd.docket_entry d = de.docket return "{desc}#{doc_num}{att_num} in {case_name} ({court}{docket_number})".format( desc="%s – " % rd.description if rd.description else "", doc_num=rd.document_number, att_num=", Att. #%s" % rd.attachment_number if rd.document_type == RECAPDocument.ATTACHMENT else "", case_name=best_case_name(d), court=d.court.citation_string, docket_number=", %s" % d.docket_number if d.docket_number else "", )
def make_rd_title(rd): de = rd.docket_entry d = de.docket return '{desc}#{doc_num}{att_num} in {case_name} ({court}{docket_number})'.format( desc='%s – ' % rd.description if rd.description else '', doc_num=rd.document_number, att_num=', Att. #%s' % rd.attachment_number if rd.document_type == RECAPDocument.ATTACHMENT else '', case_name=best_case_name(d), court=d.court.citation_string, docket_number=', %s' % d.docket_number if d.docket_number else '', )
def send_docket_alert(d_pk, since): """Send an alert for a given docket :param d_pk: The docket PK that was modified :param since: If we run alerts, notify users about items *since* this time. :return: None """ email_addresses = (User.objects.filter( docket_alerts__docket_id=d_pk).distinct().values_list("email", flat=True)) if email_addresses: # We have an alert for this docket. Proceed. docket = Docket.objects.get(pk=d_pk) new_des = DocketEntry.objects.filter(date_created__gte=since, docket=docket) if new_des.count() > 0: # Notify every user that's subscribed to this alert. case_name = trunc(best_case_name(docket), 100, ellipsis="...") subject_template = loader.get_template("docket_alert_subject.txt") subject = subject_template.render({ "docket": docket, "count": new_des.count(), "case_name": case_name, }).strip() # Remove newlines that editors can insist on adding. email_context = {"new_des": new_des, "docket": docket} txt_template = loader.get_template("docket_alert_email.txt") html_template = loader.get_template("docket_alert_email.html") messages = [] for email_address in email_addresses: msg = EmailMultiAlternatives( subject=subject, body=txt_template.render(email_context), from_email=settings.DEFAULT_ALERTS_EMAIL, to=[email_address], headers={"X-Entity-Ref-ID": "docket.alert:%s" % d_pk}, ) html = html_template.render(email_context) msg.attach_alternative(html, "text/html") messages.append(msg) # Add a bcc to the first message in the list so that we get a copy. messages[0].bcc = ["*****@*****.**"] connection = get_connection() connection.send_messages(messages) tally_stat("alerts.docket.alerts.sent", inc=len(email_addresses)) DocketAlert.objects.filter(docket=docket).update(date_last_hit=now()) # Work completed, clear the semaphore r = make_redis_interface("ALERTS") r.delete(make_alert_key(d_pk))
def view_docket(request, pk, _): docket = get_object_or_404(Docket, pk=pk) if not is_bot(request): docket.view_count = F('view_count') + 1 docket.save() try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ 'docket_id': docket.pk, 'name': trunc(best_case_name(docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) de_list = docket.docket_entries.all() form = DocketEntryFilterForm(request.GET) if form.is_valid(): cd = form.cleaned_data if cd.get('entry_gte'): de_list = de_list.filter(entry_number__gte=cd['entry_gte']) if cd.get('entry_lte'): de_list = de_list.filter(entry_number__lte=cd['entry_lte']) if cd.get('filed_after'): de_list = de_list.filter(date_filed__gte=cd['filed_after']) if cd.get('filed_before'): de_list = de_list.filter(date_filed__lte=cd['filed_before']) if cd.get('order_by') == DocketEntryFilterForm.DESCENDING: de_list = de_list.order_by('-entry_number') paginator = Paginator(de_list, 500, orphans=25) page = request.GET.get('page') try: docket_entries = paginator.page(page) except PageNotAnInteger: docket_entries = paginator.page(1) except EmptyPage: docket_entries = paginator.page(paginator.num_pages) return render( request, 'view_docket.html', { 'docket': docket, 'docket_entries': docket_entries, 'form': form, 'favorite_form': favorite_form, 'get_string': make_get_string(request), 'private': docket.blocked, })
def cluster_visualizations( request: HttpRequest, pk: int, slug: str ) -> HttpResponse: cluster = get_object_or_404(OpinionCluster, pk=pk) return render( request, "view_opinion_visualizations.html", { "title": "%s, %s" % (trunc(best_case_name(cluster), 100), cluster.citation_string), "cluster": cluster, "private": cluster.blocked or cluster.has_private_authority, }, )
def __init__(self, item): self.id = item.pk self.docket_id = item.docket_id # Docket if item.docket.date_argued is not None: self.dateArgued = datetime.combine( item.docket.date_argued, time() ) if item.docket.date_reargued is not None: self.dateReargued = datetime.combine( item.docket.date_reargued, time() ) if item.docket.date_reargument_denied is not None: self.dateReargumentDenied = datetime.combine( item.docket.date_reargument_denied, time() ) self.docketNumber = item.docket.docket_number # Court self.court = item.docket.court.full_name self.court_id = item.docket.court_id self.court_citation_string = item.docket.court.citation_string # Audio file self.caseName = best_case_name(item) self.panel_ids = [judge.pk for judge in item.panel.all()] self.judge = item.judges self.file_size_mp3 = deepgetattr(item, 'local_path_mp3.size', None) self.duration = item.duration self.source = item.source self.download_url = item.download_url self.local_path = unicode(getattr(item, 'local_path_mp3', None)) try: self.absolute_url = item.get_absolute_url() except NoReverseMatch: raise InvalidDocumentError( "Unable to save to index due to missing absolute_url: %s" % item.pk) text_template = loader.get_template('indexes/audio_text.txt') context = {'item': item} self.text = text_template.render(context).translate(null_map) # For faceting self.court_exact = item.docket.court_id
def view_docket(request, pk, _): docket = get_object_or_404(Docket, pk=pk) if not is_bot(request): docket.view_count = F('view_count') + 1 docket.save() try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ 'docket_id': docket.pk, 'name': trunc(best_case_name(docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) de_list = docket.docket_entries.all() form = DocketEntryFilterForm(request.GET) if form.is_valid(): cd = form.cleaned_data if cd.get('entry_gte'): de_list = de_list.filter(entry_number__gte=cd['entry_gte']) if cd.get('entry_lte'): de_list = de_list.filter(entry_number__lte=cd['entry_lte']) if cd.get('filed_after'): de_list = de_list.filter(date_filed__gte=cd['filed_after']) if cd.get('filed_before'): de_list = de_list.filter(date_filed__lte=cd['filed_before']) if cd.get('order_by') == DocketEntryFilterForm.DESCENDING: de_list = de_list.order_by('-entry_number') paginator = Paginator(de_list, 500, orphans=25) page = request.GET.get('page') try: docket_entries = paginator.page(page) except PageNotAnInteger: docket_entries = paginator.page(1) except EmptyPage: docket_entries = paginator.page(paginator.num_pages) return render(request, 'view_docket.html', { 'docket': docket, 'docket_entries': docket_entries, 'form': form, 'favorite_form': favorite_form, 'get_string': make_get_string(request), 'private': docket.blocked, })
def view_authorities(request, pk, slug): cluster = get_object_or_404(OpinionCluster, pk=pk) return render_to_response( 'view_opinion_authorities.html', { 'title': '%s, %s' % (trunc(best_case_name(cluster), 100), cluster.citation_string), 'cluster': cluster, 'private': cluster.blocked or cluster.has_private_authority, 'authorities': cluster.authorities.order_by('case_name'), }, RequestContext(request))
def view_authorities(request, pk, slug): cluster = get_object_or_404(OpinionCluster, pk=pk) return render_to_response( 'view_opinion_authorities.html', { 'title': '%s, %s' % ( trunc(best_case_name(cluster), 100), cluster.citation_string ), 'cluster': cluster, 'private': cluster.blocked or cluster.has_private_authority, 'authorities': cluster.authorities.order_by('case_name'), }, RequestContext(request) )
def filter_by_matching_antecedent( opinion_candidates: Iterable[Opinion], antecedent_guess: Optional[str], ) -> Optional[Opinion]: if not antecedent_guess: return None antecedent_guess = strip_punct(antecedent_guess) candidates: List[Opinion] = [] for o in opinion_candidates: if antecedent_guess in best_case_name(o.cluster): candidates.append(o) # Remove duplicates and only accept if one candidate remains candidates = list(set(candidates)) return candidates[0] if len(candidates) == 1 else None
def view_authorities(request, pk, slug): cluster = get_object_or_404(OpinionCluster, pk=pk) return render( request, "view_opinion_authorities.html", { "title": "%s, %s" % (trunc(best_case_name(cluster), 100), cluster.citation_string), "cluster": cluster, "private": cluster.blocked or cluster.has_private_authority, "authorities_with_data": cluster.authorities_with_data, }, )
def process_audio_file(self, pk) -> None: """Given the key to an audio file, extract its content and add the related meta data to the database. :param self: A Celery task object :param pk: Audio file pk :return: None """ af = Audio.objects.get(pk=pk) bte_audio_response = convert_and_clean_audio(af) bte_audio_response.raise_for_status() audio_obj = bte_audio_response.json() cf = ContentFile(base64.b64decode(audio_obj["audio_b64"])) file_name = trunc(best_case_name(af).lower(), 72) + "_cl.mp3" af.file_with_date = af.docket.date_argued af.local_path_mp3.save(file_name, cf, save=False) af.duration = audio_obj["duration"] af.processing_complete = True af.save()
def __init__(self, item): self.id = item.pk self.docket_id = item.docket_id # Docket if item.docket.date_argued is not None: self.dateArgued = datetime.combine(item.docket.date_argued, time()) if item.docket.date_reargued is not None: self.dateReargued = datetime.combine(item.docket.date_reargued, time()) if item.docket.date_reargument_denied is not None: self.dateReargumentDenied = datetime.combine( item.docket.date_reargument_denied, time()) self.docketNumber = item.docket.docket_number # Court self.court = item.docket.court.full_name self.court_id = item.docket.court_id self.court_citation_string = item.docket.court.citation_string # Audio file self.caseName = best_case_name(item) self.panel_ids = [judge.pk for judge in item.panel.all()] self.judge = item.judges self.file_size_mp3 = deepgetattr(item, 'local_path_mp3.size', None) self.duration = item.duration self.source = item.source self.download_url = item.download_url self.local_path = unicode(getattr(item, 'local_path_mp3', None)) try: self.absolute_url = item.get_absolute_url() except NoReverseMatch: raise InvalidDocumentError( "Unable to save to index due to missing absolute_url: %s" % item.pk) text_template = loader.get_template('indexes/audio_text.txt') context = {'item': item} self.text = text_template.render(context).translate(null_map) # For faceting self.court_exact = item.docket.court_id
def process_audio_file(pk): """Given the key to an audio file, extract its content and add the related meta data to the database. """ af = Audio.objects.get(pk=pk) tmp_path = os.path.join('/tmp', 'audio_' + uuid.uuid4().hex + '.mp3') avconv_command = [ 'avconv', '-i', af.local_path_original_file.path, '-ar', '22050', # sample rate (audio samples/s) of 22050Hz '-ab', '48k', # constant bit rate (sample resolution) of 48kbps tmp_path ] try: _ = subprocess.check_output( avconv_command, stderr=subprocess.STDOUT ) except subprocess.CalledProcessError as e: print('avconv failed command: %s\nerror code: %s\noutput: %s\n%s' % (avconv_command, e.returncode, e.output, traceback.format_exc())) raise set_mp3_meta_data(af, tmp_path) try: with open(tmp_path, 'r') as mp3: cf = ContentFile(mp3.read()) file_name = trunc(best_case_name(af).lower(), 72) + '_cl.mp3' af.file_with_date = af.docket.date_argued af.local_path_mp3.save(file_name, cf, save=False) except: msg = ("Unable to save mp3 to audio_file in scraper.tasks." "process_audio_file for item: %s\n" "Traceback:\n" "%s" % (af.pk, traceback.format_exc())) print(msg) ErrorLog.objects.create(log_level='CRITICAL', court=af.docket.court, message=msg) af.duration = eyed3.load(tmp_path).info.time_secs af.processing_complete = True af.save()
def view_recap_document(request, docket_id=None, doc_num=None, att_num=None, slug=''): """This view can either load an attachment or a regular document, depending on the URL pattern that is matched. """ item = get_object_or_404( RECAPDocument, docket_entry__docket__id=docket_id, document_number=doc_num, attachment_number=att_num, ) title = '%sDocument #%s%s in %s' % ( '%s – ' % item.description if item.description else '', item.document_number, ', Attachment #%s' % item.attachment_number if item.document_type == RECAPDocument.ATTACHMENT else '', best_case_name(item.docket_entry.docket), ) try: fave = Favorite.objects.get(recap_doc_id=item.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ 'recap_doc_id': item.pk, 'name': trunc(title, 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) return render( request, 'recap_document.html', { 'document': item, 'title': title, 'favorite_form': favorite_form, 'private': True, # Always True for RECAP docs. })
def __init__(self, item): self.id = item.pk self.docket_id = item.cluster.docket.pk self.cluster_id = item.cluster.pk self.court_id = item.cluster.docket.court.pk # Docket if item.cluster.docket.date_argued is not None: self.dateArgued = datetime.combine( item.cluster.docket.date_argued, time(), ) if item.cluster.docket.date_reargued is not None: self.dateReargued = datetime.combine( item.cluster.docket.date_reargued, time(), ) if item.cluster.docket.date_reargument_denied is not None: self.dateReargumentDenied = datetime.combine( item.cluster.docket.date_reargument_denied, time(), ) self.docketNumber = item.cluster.docket.docket_number # Court self.court = item.cluster.docket.court.full_name self.court_citation_string = item.cluster.docket.court.citation_string # Cluster self.caseName = best_case_name(item.cluster) self.caseNameShort = item.cluster.case_name_short self.sibling_ids = [sibling.pk for sibling in item.siblings.all()] self.panel_ids = [judge.pk for judge in item.cluster.panel.all()] self.non_participating_judge_ids = [judge.pk for judge in item.cluster.non_participating_judges.all()] self.judge = item.cluster.judges if item.cluster.date_filed is not None: self.dateFiled = datetime.combine( item.cluster.date_filed, time() ) # Midnight, PST self.lexisCite = item.cluster.lexis_cite self.citation = [cite for cite in item.cluster.citation_list if cite] # Nuke '' and None self.neutralCite = item.cluster.neutral_cite self.scdb_id = item.cluster.scdb_id self.source = item.cluster.source self.attorney = item.cluster.attorneys self.suitNature = item.cluster.nature_of_suit self.citeCount = item.cluster.citation_count self.status = item.cluster.get_precedential_status_display() # Opinion self.cites = [opinion.pk for opinion in item.opinions_cited.all()] self.author_id = getattr(item.author, 'pk', None) # self.per_curiam = item.per_curiam self.joined_by_ids = [judge.pk for judge in item.joined_by.all()] self.type = item.type self.download_url = item.download_url or None self.local_path = unicode(item.local_path) try: self.absolute_url = item.cluster.get_absolute_url() except NoReverseMatch: raise InvalidDocumentError( "Unable to save to index due to missing absolute_url " "(court_id: %s, item.pk: %s). Might the court have in_use set " "to False?" % (self.docket.court_id, item.pk) ) # Load the document text using a template for cleanup and concatenation text_template = loader.get_template('indexes/opinion_text.txt') context = {'item': item, 'citation_string': item.cluster.citation_string} self.text = text_template.render(context).translate(null_map) # Faceting fields self.status_exact = item.cluster.get_precedential_status_display() self.court_exact = item.cluster.docket.court.pk
def view_opinion(request, pk, _): """Using the cluster ID, return the cluster of opinions. We also test if the cluster ID is a favorite for the user, and send data if needed. If it's a favorite, we send the bound form for the favorite so it can populate the form on the page. If it is not a favorite, we send the unbound form. """ # Look up the court, cluster, title and favorite information cluster = get_object_or_404(OpinionCluster, pk=pk) title = ', '.join([s for s in [ trunc(best_case_name(cluster), 100, ellipsis="..."), cluster.citation_string, ] if s.strip()]) has_downloads = False for sub_opinion in cluster.sub_opinions.all(): if sub_opinion.local_path or sub_opinion.download_url: has_downloads = True break get_string = search_utils.make_get_string(request) try: fave = Favorite.objects.get(cluster_id=cluster.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ 'cluster_id': cluster.pk, 'name': trunc(best_case_name(cluster), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) if not is_bot(request): # Get the citing results from Solr for speed. Only do this for humans # to save on disk usage. conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r') q = { 'q': 'cites:({ids})'.format( ids=' OR '.join([str(pk) for pk in (cluster.sub_opinions .values_list('pk', flat=True))]) ), 'rows': 5, 'start': 0, 'sort': 'citeCount desc', 'caller': 'view_opinion', } citing_clusters = conn.raw_query(**q).execute() else: citing_clusters = None return render(request, 'view_opinion.html', { 'title': title, 'cluster': cluster, 'has_downloads': has_downloads, 'favorite_form': favorite_form, 'get_string': get_string, 'private': cluster.blocked, 'citing_clusters': citing_clusters, 'top_authorities': cluster.authorities[:5], })
stderr=subprocess.STDOUT ) except subprocess.CalledProcessError, e: print 'avconv failed command: %s\nerror code: %s\noutput: %s\n' % \ (avconv_command, e.returncode, e.output) print traceback.format_exc() raise # Have to do this last because otherwise the mp3 hasn't yet been generated. set_mp3_meta_data(af, path_to_tmp_location) af.duration = eyed3.load(path_to_tmp_location).info.time_secs with open(path_to_tmp_location, 'r') as mp3: try: cf = ContentFile(mp3.read()) file_name = trunc(best_case_name(af).lower(), 72) + '_cl.mp3' af.file_with_date = af.docket.date_argued af.local_path_mp3.save(file_name, cf, save=False) except: msg = "Unable to save mp3 to audio_file in scraper.tasks.process_" \ "audio_file for item: %s\nTraceback:\n%s" % \ (af.pk, traceback.format_exc()) print msg ErrorLog(log_level='CRITICAL', court=af.docket.court, message=msg).save() af.processing_complete = True af.save() os.remove(path_to_tmp_location)
def as_search_dict(self): """Create a dict that can be ingested by Solr.""" # IDs out = { 'id': self.pk, 'docket_id': self.cluster.docket.pk, 'cluster_id': self.cluster.pk, 'court_id': self.cluster.docket.court.pk } # Opinion out.update({ 'cites': [opinion.pk for opinion in self.opinions_cited.all()], 'author_id': getattr(self.author, 'pk', None), # 'per_curiam': self.per_curiam, 'joined_by_ids': [judge.pk for judge in self.joined_by.all()], 'type': self.type, 'download_url': self.download_url or None, 'local_path': unicode(self.local_path), }) # Cluster out.update({ 'caseName': best_case_name(self.cluster), 'caseNameShort': self.cluster.case_name_short, 'sibling_ids': [sibling.pk for sibling in self.siblings.all()], 'panel_ids': [judge.pk for judge in self.cluster.panel.all()], 'non_participating_judge_ids': [ judge.pk for judge in self.cluster.non_participating_judges.all() ], 'judge': self.cluster.judges, 'lexisCite': self.cluster.lexis_cite, 'citation': [ cite for cite in self.cluster.citation_list if cite], # Nuke '' and None 'neutralCite': self.cluster.neutral_cite, 'scdb_id': self.cluster.scdb_id, 'source': self.cluster.source, 'attorney': self.cluster.attorneys, 'suitNature': self.cluster.nature_of_suit, 'citeCount': self.cluster.citation_count, 'status': self.cluster.get_precedential_status_display(), 'status_exact': self.cluster.get_precedential_status_display(), }) if self.cluster.date_filed is not None: out['dateFiled'] = datetime.combine( self.cluster.date_filed, time() ) # Midnight, PST try: out['absolute_url'] = self.cluster.get_absolute_url() except NoReverseMatch: raise InvalidDocumentError( "Unable to save to index due to missing absolute_url " "(court_id: %s, item.pk: %s). Might the court have in_use set " "to False?" % (self.cluster.docket.court_id, self.pk) ) # Docket docket = {'docketNumber': self.cluster.docket.docket_number} if self.cluster.docket.date_argued is not None: docket['dateArgued'] = datetime.combine( self.cluster.docket.date_argued, time(), ) if self.cluster.docket.date_reargued is not None: docket['dateReargued'] = datetime.combine( self.cluster.docket.date_reargued, time(), ) if self.cluster.docket.date_reargument_denied is not None: docket['dateReargumentDenied'] = datetime.combine( self.cluster.docket.date_reargument_denied, time(), ) out.update(docket) court = { 'court': self.cluster.docket.court.full_name, 'court_citation_string': self.cluster.docket.court.citation_string, 'court_exact': self.cluster.docket.court_id, # For faceting } out.update(court) # Load the document text using a template for cleanup and concatenation text_template = loader.get_template('indexes/opinion_text.txt') out['text'] = text_template.render({ 'item': self, 'citation_string': self.cluster.citation_string }).translate(null_map) return nuke_nones(out)
def as_search_dict(self): """Create a dict that can be ingested by Solr. Search results are presented as Dockets, but they're indexed as RECAPDocument's, which are then grouped back together in search results to form Dockets. """ # IDs out = { 'id': self.pk, 'docket_entry_id': self.docket_entry.pk, 'docket_id': self.docket_entry.docket.pk, 'court_id': self.docket_entry.docket.court.pk, 'assigned_to_id': getattr( self.docket_entry.docket.assigned_to, 'pk', None), 'referred_to_id': getattr( self.docket_entry.docket.referred_to, 'pk', None) } # RECAPDocument out.update({ 'document_type': self.get_document_type_display(), 'document_number': self.document_number, 'attachment_number': self.attachment_number, 'is_available': self.is_available, 'page_count': self.page_count, }) if hasattr(self.filepath_local, 'path'): out['filepath_local'] = self.filepath_local.path # Docket Entry out['description'] = self.docket_entry.description if self.docket_entry.entry_number is not None: out['entry_number'] = self.docket_entry.entry_number if self.docket_entry.date_filed is not None: out['entry_date_filed'] = datetime.combine( self.docket_entry.date_filed, time() ) # Docket out.update({ 'docketNumber': self.docket_entry.docket.docket_number, 'caseName': best_case_name(self.docket_entry.docket), 'suitNature': self.docket_entry.docket.nature_of_suit, 'cause': self.docket_entry.docket.cause, 'juryDemand': self.docket_entry.docket.jury_demand, 'jurisdictionType': self.docket_entry.docket.jurisdiction_type, }) if self.docket_entry.docket.date_argued is not None: out['dateArgued'] = datetime.combine( self.docket_entry.docket.date_argued, time() ) if self.docket_entry.docket.date_filed is not None: out['dateFiled'] = datetime.combine( self.docket_entry.docket.date_filed, time() ) if self.docket_entry.docket.date_terminated is not None: out['dateTerminated'] = datetime.combine( self.docket_entry.docket.date_terminated, time() ) try: out['absolute_url'] = self.docket_entry.docket.get_absolute_url() except NoReverseMatch: raise InvalidDocumentError( "Unable to save to index due to missing absolute_url: %s" % self.pk ) # Judges if self.docket_entry.docket.assigned_to is not None: out['assignedTo'] = self.docket_entry.docket.assigned_to.name_full elif self.docket_entry.docket.assigned_to_str is not None: out['assignedTo'] = self.docket_entry.docket.assigned_to_str if self.docket_entry.docket.referred_to is not None: out['referredTo'] = self.docket_entry.docket.referred_to.name_full elif self.docket_entry.docket.referred_to_str is not None: out['referredTo'] = self.docket_entry.docket.referred_to_str # Court out.update({ 'court': self.docket_entry.docket.court.full_name, 'court_exact': self.docket_entry.docket.court_id, # For faceting 'court_citation_string': self.docket_entry.docket.court.citation_string }) text_template = loader.get_template('indexes/dockets_text.txt') out['text'] = text_template.render({'item': self}).translate(null_map) return nuke_nones(out)