def core_docket_data(request, pk): """Gather the core data for a docket, party, or IDB page.""" docket = get_object_or_404(Docket, pk=pk) title = ', '.join([ s for s in [ trunc(best_case_name(docket), 100, ellipsis="..."), docket.docket_number, ] if s.strip() ]) try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ 'docket_id': docket.pk, 'name': trunc(best_case_name(docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) has_alert = False if request.user.is_authenticated: has_alert = DocketAlert.objects.filter(docket=docket, user=request.user).exists() return docket, { 'docket': docket, 'title': title, 'favorite_form': favorite_form, 'has_alert': has_alert, 'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'), 'private': docket.blocked, }
def core_docket_data(request, pk): """Gather the core data for a docket, party, or IDB page.""" docket = get_object_or_404(Docket, pk=pk) title = ', '.join([s for s in [ trunc(best_case_name(docket), 100, ellipsis="..."), docket.docket_number, ] if s.strip()]) try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ 'docket_id': docket.pk, 'name': trunc(best_case_name(docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) has_alert = False if request.user.is_authenticated: has_alert = DocketAlert.objects.filter(docket=docket, user=request.user).exists() return docket, { 'docket': docket, 'title': title, 'favorite_form': favorite_form, 'has_alert': has_alert, 'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'), 'private': docket.blocked, }
def view_audio_file(request, pk, _): """Using the ID, return the oral argument page. We also test if the item is a favorite and send data as such. """ af = get_object_or_404(Audio, pk=pk) title = trunc(af.case_name, 100) get_string = search_utils.make_get_string(request) try: fave = Favorite.objects.get(audio_id=af.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ 'audio_id': af.pk, 'name': trunc(best_case_name(af.docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) return render(request, 'oral_argument.html', { 'title': title, 'af': af, 'favorite_form': favorite_form, 'get_string': get_string, 'private': af.blocked, })
def view_audio_file(request, pk, _): """Using the ID, return the oral argument page. We also test if the item is a favorite and send data as such. """ af = get_object_or_404(Audio, pk=pk) title = trunc(af.case_name, 100) get_string = search_utils.make_get_string(request) try: fave = Favorite.objects.get(audio_id=af.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ 'audio_id': af.pk, 'name': trunc(best_case_name(af.docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) return render( request, 'oral_argument.html', { 'title': title, 'af': af, 'favorite_form': favorite_form, 'get_string': get_string, 'private': af.blocked, })
def view_opinion(request, pk, _): """Using the cluster ID, return the cluster of opinions. We also test if the cluster ID is a favorite for the user, and send data if needed. If it's a favorite, we send the bound form for the favorite so it can populate the form on the page. If it is not a favorite, we send the unbound form. """ # Look up the court, cluster, title and favorite information cluster = get_object_or_404(OpinionCluster, pk=pk) title = '%s, %s' % ( trunc(best_case_name(cluster), 100), cluster.citation_string, ) get_string = search_utils.make_get_string(request) try: fave = Favorite.objects.get( cluster_id=cluster.pk, user=request.user, ) favorite_form = FavoriteForm(instance=fave) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ 'cluster_id': cluster.pk, 'name': trunc(best_case_name(cluster), 100, ellipsis='...'), } ) # Get the citing results from Solr for speed. conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r') q = { 'q': 'cites:({ids})'.format( ids=' OR '.join([str(pk) for pk in (cluster.sub_opinions .values_list('pk', flat=True))]) ), 'rows': 5, 'start': 0, 'sort': 'citeCount desc', 'caller': 'view_opinion', } citing_clusters = conn.raw_query(**q).execute() return render_to_response( 'view_opinion.html', { 'title': title, 'cluster': cluster, 'favorite_form': favorite_form, 'get_string': get_string, 'private': cluster.blocked, 'citing_clusters': citing_clusters, 'top_authorities': cluster.authorities[:5], }, RequestContext(request) )
def view_parties(request, docket_id, slug): """Show the parties and attorneys tab on the docket.""" docket = get_object_or_404(Docket, pk=docket_id) title = ', '.join([ s for s in [ trunc(best_case_name(docket), 100, ellipsis="..."), docket.docket_number, ] if s.strip() ]) try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ 'docket_id': docket.pk, 'name': trunc(best_case_name(docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) # We work with this data at the level of party_types so that we can group # the parties by this field. From there, we do a whole mess of prefetching, # which reduces the number of queries needed for this down to four instead # of potentially thousands (good times!) party_types = docket.party_types.select_related('party').prefetch_related( Prefetch( 'party__roles', queryset=Role.objects.filter(docket=docket).order_by( 'attorney_id', 'role', 'date_action').select_related('attorney').prefetch_related( Prefetch( 'attorney__organizations', queryset=AttorneyOrganization.objects.filter( attorney_organization_associations__docket=docket). distinct(), to_attr='firms_in_docket', )))).order_by('name', 'party__name') parties = [] for party_type_name, party_types in groupby(party_types, lambda x: x.name): party_types = list(party_types) parties.append({ 'party_type_name': party_type_name, 'party_type_objects': party_types }) return render( request, 'docket_parties.html', { 'docket': docket, 'title': title, 'parties': parties, 'favorite_form': favorite_form, 'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'), 'private': docket.blocked, })
def view_opinion(request, pk, _): """Using the cluster ID, return the cluster of opinions. We also test if the cluster ID is a favorite for the user, and send data if needed. If it's a favorite, we send the bound form for the favorite so it can populate the form on the page. If it is not a favorite, we send the unbound form. """ # Look up the court, cluster, title and favorite information cluster = get_object_or_404(OpinionCluster, pk=pk) title = ", ".join([ s for s in [ trunc(best_case_name(cluster), 100, ellipsis="..."), cluster.citation_string, ] if s.strip() ]) has_downloads = False for sub_opinion in cluster.sub_opinions.all(): if sub_opinion.local_path or sub_opinion.download_url: has_downloads = True break get_string = make_get_string(request) try: fave = Favorite.objects.get(cluster_id=cluster.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ "cluster_id": cluster.pk, "name": trunc(best_case_name(cluster), 100, ellipsis="..."), }) else: favorite_form = FavoriteForm(instance=fave) citing_clusters = get_citing_clusters_with_cache(cluster, is_bot(request)) return render( request, "view_opinion.html", { "title": title, "cluster": cluster, "has_downloads": has_downloads, "favorite_form": favorite_form, "get_string": get_string, "private": cluster.blocked, "citing_clusters": citing_clusters, "top_authorities": cluster.authorities_with_data[:5], "authorities_count": len(cluster.authorities_with_data), }, )
def view_recap_document(request, docket_id=None, doc_num=None, att_num=None, slug=''): """This view can either load an attachment or a regular document, depending on the URL pattern that is matched. """ item = get_object_or_404( RECAPDocument, docket_entry__docket__id=docket_id, document_number=doc_num, attachment_number=att_num, ) title = make_rd_title(item) if is_og_bot(request): make_thumb_if_needed(item) item.refresh_from_db() try: fave = Favorite.objects.get(recap_doc_id=item.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ 'recap_doc_id': item.pk, 'name': trunc(title, 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) return render(request, 'recap_document.html', { 'document': item, 'title': title, 'favorite_form': favorite_form, 'private': True, # Always True for RECAP docs. })
def save(self, index=True, force_commit=False, *args, **kwargs): self.slug = slugify(trunc(best_case_name(self), 75)) super(OpinionCluster, self).save(*args, **kwargs) if index: from cl.search.tasks import add_or_update_cluster add_or_update_cluster.delay(self.pk, force_commit)
def core_docket_data( request: HttpRequest, pk: int, ) -> Tuple[Docket, Dict[str, Union[bool, str, Docket, FavoriteForm]]]: """Gather the core data for a docket, party, or IDB page.""" docket = get_object_or_404(Docket, pk=pk) title = make_docket_title(docket) try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ "docket_id": docket.pk, "name": trunc(best_case_name(docket), 100, ellipsis="..."), } ) else: favorite_form = FavoriteForm(instance=fave) has_alert = user_has_alert(request.user, docket) return ( docket, { "docket": docket, "title": title, "favorite_form": favorite_form, "has_alert": has_alert, "timezone": COURT_TIMEZONES.get(docket.court_id, "US/Eastern"), "private": docket.blocked, }, )
def view_recap_document(request, docket_id=None, doc_num=None, att_num=None, slug=''): """This view can either load an attachment or a regular document, depending on the URL pattern that is matched. """ item = get_object_or_404( RECAPDocument, docket_entry__docket__id=docket_id, document_number=doc_num, attachment_number=att_num, ) title = '%sDocument #%s%s in %s' % ( '%s – ' % item.description if item.description else '', item.document_number, ', Attachment #%s' % item.attachment_number if item.document_type == RECAPDocument.ATTACHMENT else '', best_case_name(item.docket_entry.docket), ) try: fave = Favorite.objects.get(recap_doc_id=item.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ 'recap_doc_id': item.pk, 'name': trunc(title, 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) return render(request, 'recap_document.html', { 'document': item, 'title': title, 'favorite_form': favorite_form, 'private': True, # Always True for RECAP docs. })
def make_pdf_path(instance, filename, thumbs=False): from cl.search.models import ClaimHistory, RECAPDocument from cl.lasc.models import LASCPDF if type(instance) == RECAPDocument: root = "recap" court_id = instance.docket_entry.docket.court_id pacer_case_id = instance.docket_entry.docket.pacer_case_id elif type(instance) == ClaimHistory: root = "claim" court_id = instance.claim.docket.court_id pacer_case_id = instance.pacer_case_id elif type(instance) == LASCPDF: slug = slugify(trunc(filename, 40)) root = "/us/state/ca/lasc/%s/" % instance.docket_number file_name = "gov.ca.lasc.%s.%s.%s.pdf" % ( instance.docket_number, instance.document_id, slug, ) return os.path.join(root, file_name) else: raise ValueError("Unknown model type in make_pdf_path " "function: %s" % type(instance)) if thumbs: root = root + "-thumbnails" return os.path.join(root, get_bucket_name(court_id, pacer_case_id), filename)
def save(self, *args, **kwargs): self.slug = slugify(trunc(best_case_name(self), 75)) if self.source == 1 and not self.pacer_case_id: raise ValidationError("pacer_case_id cannot be Null or empty in " "RECAP documents.") super(Docket, self).save(*args, **kwargs)
def view_docket(request, pk, slug): docket = get_object_or_404(Docket, pk=pk) if not is_bot(request): with suppress_autotime(docket, ['date_modified']): cached_count = docket.view_count docket.view_count = F('view_count') + 1 docket.save() docket.view_count = cached_count + 1 try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ 'docket_id': docket.pk, 'name': trunc(best_case_name(docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) de_list = docket.docket_entries.all().prefetch_related('recap_documents') form = DocketEntryFilterForm(request.GET) if form.is_valid(): cd = form.cleaned_data if cd.get('entry_gte'): de_list = de_list.filter(entry_number__gte=cd['entry_gte']) if cd.get('entry_lte'): de_list = de_list.filter(entry_number__lte=cd['entry_lte']) if cd.get('filed_after'): de_list = de_list.filter(date_filed__gte=cd['filed_after']) if cd.get('filed_before'): de_list = de_list.filter(date_filed__lte=cd['filed_before']) if cd.get('order_by') == DocketEntryFilterForm.DESCENDING: de_list = de_list.order_by('-entry_number') paginator = Paginator(de_list, 100, orphans=5) page = request.GET.get('page') try: docket_entries = paginator.page(page) except PageNotAnInteger: docket_entries = paginator.page(1) except EmptyPage: docket_entries = paginator.page(paginator.num_pages) return render( request, 'view_docket.html', { 'docket': docket, 'parties': docket.parties.exists(), # Needed to show/hide parties tab. 'docket_entries': docket_entries, 'form': form, 'favorite_form': favorite_form, 'get_string': make_get_string(request), 'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'), 'private': docket.blocked, })
def make_objects(self, item, court, sha1_hash, content): """Takes the meta data from the scraper and associates it with objects. Returns the created objects. """ blocked = item['blocked_statuses'] if blocked is not None: date_blocked = date.today() else: date_blocked = None case_name_short = (item.get('case_name_shorts') or self.cnt.make_case_name_short(item['case_names'])) docket = Docket( docket_number=item.get('docket_numbers', ''), case_name=item['case_names'], case_name_short=case_name_short, court=court, blocked=blocked, date_blocked=date_blocked, source=Docket.SCRAPER, ) cluster = OpinionCluster( judges=item.get('judges', ''), date_filed=item['case_dates'], case_name=item['case_names'], case_name_short=case_name_short, source='C', precedential_status=item['precedential_statuses'], nature_of_suit=item.get('nature_of_suit', ''), blocked=blocked, date_blocked=date_blocked, federal_cite_one=item.get('west_citations', ''), state_cite_one=item.get('west_state_citations', ''), neutral_cite=item.get('neutral_citations', ''), ) opinion = Opinion( type='010combined', sha1=sha1_hash, download_url=item['download_urls'], ) error = False try: cf = ContentFile(content) extension = get_extension(content) file_name = trunc(item['case_names'].lower(), 75) + extension opinion.file_with_date = cluster.date_filed opinion.local_path.save(file_name, cf, save=False) except: msg = ('Unable to save binary to disk. Deleted ' 'item: %s.\n %s' % (item['case_names'], traceback.format_exc())) logger.critical(msg.encode('utf-8')) ErrorLog(log_level='CRITICAL', court=court, message=msg).save() error = True return docket, opinion, cluster, error
def make_docket_title(docket): title = ", ".join([ s for s in [ trunc(best_case_name(docket), 100, ellipsis="..."), docket.docket_number, ] if s.strip() ]) return title
def make_objects(self, item, court, sha1_hash, content): blocked = item["blocked_statuses"] if blocked: date_blocked = date.today() else: date_blocked = None case_name_short = item.get( "case_name_shorts" ) or self.cnt.make_case_name_short(item["case_names"]) docket = Docket( docket_number=item.get("docket_numbers", ""), case_name=item["case_names"], case_name_short=case_name_short, court=court, blocked=blocked, date_blocked=date_blocked, date_argued=item["case_dates"], source=Docket.SCRAPER, ) audio_file = Audio( judges=item.get("judges", ""), source="C", case_name=item["case_names"], case_name_short=case_name_short, sha1=sha1_hash, download_url=item["download_urls"], blocked=blocked, date_blocked=date_blocked, ) error = False try: cf = ContentFile(content) extension = get_extension(content) if extension not in [".mp3", ".wma"]: extension = ( "." + item["download_urls"].lower().rsplit(".", 1)[1] ) # See bitbucket issue #215 for why this must be # lower-cased. file_name = trunc(item["case_names"].lower(), 75) + extension audio_file.file_with_date = docket.date_argued audio_file.local_path_original_file.save(file_name, cf, save=False) except: msg = ( "Unable to save binary to disk. Deleted audio file: %s.\n " "%s" % (item["case_names"], traceback.format_exc()) ) logger.critical(msg.encode("utf-8")) ErrorLog(log_level="CRITICAL", court=court, message=msg).save() error = True return docket, audio_file, error
def cluster_visualizations(request, pk, slug): cluster = get_object_or_404(OpinionCluster, pk=pk) return render(request, 'view_opinion_visualizations.html', { 'title': '%s, %s' % ( trunc(best_case_name(cluster), 100), cluster.citation_string ), 'cluster': cluster, 'private': cluster.blocked or cluster.has_private_authority, })
def send_docket_alert(d_pk, since): """Send an alert for a given docket :param d_pk: The docket PK that was modified :param since: If we run alerts, notify users about items *since* this time. :return: The dict that was passed in as data is simply passed through. The next task in the chain needs the same information. """ email_addresses = User.objects.filter( docket_alerts__docket_id=d_pk, ).distinct().values_list('email', flat=True) if email_addresses: # We have an alert for this docket. Proceed. docket = Docket.objects.get(pk=d_pk) new_des = DocketEntry.objects.filter(date_created__gte=since, docket=docket) if new_des.count() > 0: # Notify every user that's subscribed to this alert. case_name = trunc(best_case_name(docket), 100, ellipsis='...') subject_template = loader.get_template('docket_alert_subject.txt') subject = subject_template.render({ 'docket': docket, 'count': new_des.count(), 'case_name': case_name, }).strip() # Remove newlines that editors can insist on adding. email_context = {'new_des': new_des, 'docket': docket} txt_template = loader.get_template('docket_alert_email.txt') html_template = loader.get_template('docket_alert_email.html') messages = [] for email_address in email_addresses: msg = EmailMultiAlternatives( subject=subject, body=txt_template.render(email_context), from_email=settings.DEFAULT_ALERTS_EMAIL, to=[email_address], headers={'X-Entity-Ref-ID': 'docket.alert:%s' % d_pk}) html = html_template.render(email_context) msg.attach_alternative(html, "text/html") messages.append(msg) # Add a bcc to the first message in the list so that we get a copy. messages[0].bcc = ['*****@*****.**'] connection = get_connection() connection.send_messages(messages) tally_stat('alerts.docket.alerts.sent', inc=len(email_addresses)) DocketAlert.objects.filter(docket=docket).update(date_last_hit=now()) # Work completed, clear the semaphor r = redis.StrictRedis(host=settings.REDIS_HOST, port=settings.REDIS_PORT, db=settings.REDIS_DATABASES['ALERTS']) r.delete(make_alert_key(d_pk))
def process_audio_file(pk): """Given the key to an audio file, extract its content and add the related meta data to the database. """ af = Audio.objects.get(pk=pk) tmp_path = os.path.join("/tmp", "audio_" + uuid.uuid4().hex + ".mp3") av_path = get_audio_binary() av_command = [ av_path, "-i", af.local_path_original_file.path, "-ar", "22050", # sample rate (audio samples/s) of 22050Hz "-ab", "48k", # constant bit rate (sample resolution) of 48kbps tmp_path, ] try: _ = subprocess.check_output(av_command, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: print( "%s failed command: %s\nerror code: %s\noutput: %s\n%s" % ( av_path, av_command, e.returncode, e.output, traceback.format_exc(), ) ) raise set_mp3_meta_data(af, tmp_path) try: with open(tmp_path, "r") as mp3: cf = ContentFile(mp3.read()) file_name = trunc(best_case_name(af).lower(), 72) + "_cl.mp3" af.file_with_date = af.docket.date_argued af.local_path_mp3.save(file_name, cf, save=False) except: msg = ( "Unable to save mp3 to audio_file in scraper.tasks." "process_audio_file for item: %s\n" "Traceback:\n" "%s" % (af.pk, traceback.format_exc()) ) print(msg) ErrorLog.objects.create( log_level="CRITICAL", court=af.docket.court, message=msg ) af.duration = eyed3.load(tmp_path).info.time_secs af.processing_complete = True af.save()
def save(self, *args, **kwargs): # Note that the title needs to be made first, so that the slug can be # generated from it. if not self.title: self.title = trunc(self.make_title(), 200, ellipsis=u'…') if self.published is True and self.date_published is None: # First time shared. self.date_published = now() if self.deleted is True and self.__original_deleted != self.deleted: # Item was just deleted. self.date_deleted = now() if self.pk is None: # First time being saved. self.slug = slugify(trunc(self.title, 75)) # If we could, we'd add clusters and json here, but you can't do # that kind of thing until the first object has been saved. super(SCOTUSMap, self).save(*args, **kwargs) self.__original_deleted = self.deleted
def save(self, *args, **kwargs): # Note that the title needs to be made first, so that the slug can be # generated from it. if not self.title: self.title = trunc(self.make_title(), 200, ellipsis=u"…") if self.published is True and self.date_published is None: # First time shared. self.date_published = now() if self.deleted is True and self.__original_deleted != self.deleted: # Item was just deleted. self.date_deleted = now() if self.pk is None: # First time being saved. self.slug = slugify(trunc(self.title, 75)) # If we could, we'd add clusters and json here, but you can't do # that kind of thing until the first object has been saved. super(SCOTUSMap, self).save(*args, **kwargs) self.__original_deleted = self.deleted
def make_objects(self, item, court, sha1_hash, content): blocked = item['blocked_statuses'] if blocked: date_blocked = date.today() else: date_blocked = None case_name_short = (item.get('case_name_shorts') or self.cnt.make_case_name_short(item['case_names'])) docket = Docket( docket_number=item.get('docket_numbers', ''), case_name=item['case_names'], case_name_short=case_name_short, court=court, blocked=blocked, date_blocked=date_blocked, date_argued=item['case_dates'], source=Docket.SCRAPER, ) audio_file = Audio( judges=item.get('judges', ''), source='C', case_name=item['case_names'], case_name_short=case_name_short, sha1=sha1_hash, download_url=item['download_urls'], blocked=blocked, date_blocked=date_blocked, ) error = False try: cf = ContentFile(content) extension = get_extension(content) if extension not in ['.mp3', '.wma']: extension = '.' + item['download_urls'].lower().rsplit('.', 1)[1] # See bitbucket issue #215 for why this must be # lower-cased. file_name = trunc(item['case_names'].lower(), 75) + extension audio_file.file_with_date = docket.date_argued audio_file.local_path_original_file.save(file_name, cf, save=False) except: msg = 'Unable to save binary to disk. Deleted audio file: %s.\n ' \ '%s' % (item['case_names'], traceback.format_exc()) logger.critical(msg.encode('utf-8')) ErrorLog(log_level='CRITICAL', court=court, message=msg).save() error = True return docket, audio_file, error
def send_docket_alert(d_pk, since): """Send an alert for a given docket :param d_pk: The docket PK that was modified :param since: If we run alerts, notify users about items *since* this time. :return: None """ email_addresses = (User.objects.filter( docket_alerts__docket_id=d_pk).distinct().values_list("email", flat=True)) if email_addresses: # We have an alert for this docket. Proceed. docket = Docket.objects.get(pk=d_pk) new_des = DocketEntry.objects.filter(date_created__gte=since, docket=docket) if new_des.count() > 0: # Notify every user that's subscribed to this alert. case_name = trunc(best_case_name(docket), 100, ellipsis="...") subject_template = loader.get_template("docket_alert_subject.txt") subject = subject_template.render({ "docket": docket, "count": new_des.count(), "case_name": case_name, }).strip() # Remove newlines that editors can insist on adding. email_context = {"new_des": new_des, "docket": docket} txt_template = loader.get_template("docket_alert_email.txt") html_template = loader.get_template("docket_alert_email.html") messages = [] for email_address in email_addresses: msg = EmailMultiAlternatives( subject=subject, body=txt_template.render(email_context), from_email=settings.DEFAULT_ALERTS_EMAIL, to=[email_address], headers={"X-Entity-Ref-ID": "docket.alert:%s" % d_pk}, ) html = html_template.render(email_context) msg.attach_alternative(html, "text/html") messages.append(msg) # Add a bcc to the first message in the list so that we get a copy. messages[0].bcc = ["*****@*****.**"] connection = get_connection() connection.send_messages(messages) tally_stat("alerts.docket.alerts.sent", inc=len(email_addresses)) DocketAlert.objects.filter(docket=docket).update(date_last_hit=now()) # Work completed, clear the semaphore r = make_redis_interface("ALERTS") r.delete(make_alert_key(d_pk))
def view_recap_document(request, docket_id=None, doc_num=None, att_num=None, slug=""): """This view can either load an attachment or a regular document, depending on the URL pattern that is matched. """ item = get_object_or_404( RECAPDocument, docket_entry__docket__id=docket_id, document_number=doc_num, attachment_number=att_num, ) # Check if the user has requested automatic redirection to the document redirectToDownload = request.GET.get("redirectToDownload", False) if redirectToDownload: # Check if the document is available from Court Listener and # if it is, redirect the user to that # if it isn't, redirect the user to PACER if item.is_available: response = redirect(item.filepath_local) else: response = redirect(item.pacer_url) return response title = make_rd_title(item) if is_og_bot(request): make_thumb_if_needed(item) item.refresh_from_db() try: fave = Favorite.objects.get(recap_doc_id=item.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ "recap_doc_id": item.pk, "name": trunc(title, 100, ellipsis="..."), }) else: favorite_form = FavoriteForm(instance=fave) return render( request, "recap_document.html", { "document": item, "title": title, "favorite_form": favorite_form, "private": True, # Always True for RECAP docs. }, )
def view_docket(request, pk, _): docket = get_object_or_404(Docket, pk=pk) if not is_bot(request): docket.view_count = F('view_count') + 1 docket.save() try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ 'docket_id': docket.pk, 'name': trunc(best_case_name(docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) de_list = docket.docket_entries.all() form = DocketEntryFilterForm(request.GET) if form.is_valid(): cd = form.cleaned_data if cd.get('entry_gte'): de_list = de_list.filter(entry_number__gte=cd['entry_gte']) if cd.get('entry_lte'): de_list = de_list.filter(entry_number__lte=cd['entry_lte']) if cd.get('filed_after'): de_list = de_list.filter(date_filed__gte=cd['filed_after']) if cd.get('filed_before'): de_list = de_list.filter(date_filed__lte=cd['filed_before']) if cd.get('order_by') == DocketEntryFilterForm.DESCENDING: de_list = de_list.order_by('-entry_number') paginator = Paginator(de_list, 500, orphans=25) page = request.GET.get('page') try: docket_entries = paginator.page(page) except PageNotAnInteger: docket_entries = paginator.page(1) except EmptyPage: docket_entries = paginator.page(paginator.num_pages) return render( request, 'view_docket.html', { 'docket': docket, 'docket_entries': docket_entries, 'form': form, 'favorite_form': favorite_form, 'get_string': make_get_string(request), 'private': docket.blocked, })
def do_docket_number(data: Dict[str, Any]) -> str: """Extract the docket number :param data: The full json data dict :return: The docket number """ if data["docket_number"]: return trunc( data["docket_number"], length=5000, ellipsis="...", ) else: return ""
def cluster_visualizations( request: HttpRequest, pk: int, slug: str ) -> HttpResponse: cluster = get_object_or_404(OpinionCluster, pk=pk) return render( request, "view_opinion_visualizations.html", { "title": "%s, %s" % (trunc(best_case_name(cluster), 100), cluster.citation_string), "cluster": cluster, "private": cluster.blocked or cluster.has_private_authority, }, )
def view_docket(request, pk, _): docket = get_object_or_404(Docket, pk=pk) if not is_bot(request): docket.view_count = F('view_count') + 1 docket.save() try: fave = Favorite.objects.get(docket_id=docket.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ 'docket_id': docket.pk, 'name': trunc(best_case_name(docket), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) de_list = docket.docket_entries.all() form = DocketEntryFilterForm(request.GET) if form.is_valid(): cd = form.cleaned_data if cd.get('entry_gte'): de_list = de_list.filter(entry_number__gte=cd['entry_gte']) if cd.get('entry_lte'): de_list = de_list.filter(entry_number__lte=cd['entry_lte']) if cd.get('filed_after'): de_list = de_list.filter(date_filed__gte=cd['filed_after']) if cd.get('filed_before'): de_list = de_list.filter(date_filed__lte=cd['filed_before']) if cd.get('order_by') == DocketEntryFilterForm.DESCENDING: de_list = de_list.order_by('-entry_number') paginator = Paginator(de_list, 500, orphans=25) page = request.GET.get('page') try: docket_entries = paginator.page(page) except PageNotAnInteger: docket_entries = paginator.page(1) except EmptyPage: docket_entries = paginator.page(paginator.num_pages) return render(request, 'view_docket.html', { 'docket': docket, 'docket_entries': docket_entries, 'form': form, 'favorite_form': favorite_form, 'get_string': make_get_string(request), 'private': docket.blocked, })
def view_authorities(request, pk, slug): cluster = get_object_or_404(OpinionCluster, pk=pk) return render_to_response( 'view_opinion_authorities.html', { 'title': '%s, %s' % (trunc(best_case_name(cluster), 100), cluster.citation_string), 'cluster': cluster, 'private': cluster.blocked or cluster.has_private_authority, 'authorities': cluster.authorities.order_by('case_name'), }, RequestContext(request))
def view_authorities(request, pk, slug): cluster = get_object_or_404(OpinionCluster, pk=pk) return render_to_response( 'view_opinion_authorities.html', { 'title': '%s, %s' % ( trunc(best_case_name(cluster), 100), cluster.citation_string ), 'cluster': cluster, 'private': cluster.blocked or cluster.has_private_authority, 'authorities': cluster.authorities.order_by('case_name'), }, RequestContext(request) )
def make_objects( item: Dict[str, Any], court: Court, sha1_hash: str, content: str, ) -> Tuple[Docket, Audio]: blocked = item["blocked_statuses"] if blocked: date_blocked = date.today() else: date_blocked = None case_name_short = item.get("case_name_shorts") or cnt.make_case_name_short( item["case_names"] ) docket = Docket( docket_number=item.get("docket_numbers", ""), case_name=item["case_names"], case_name_short=case_name_short, court=court, blocked=blocked, date_blocked=date_blocked, date_argued=item["case_dates"], source=item.get("source") or Docket.SCRAPER, ) audio_file = Audio( judges=item.get("judges", ""), source=item.get("cluster_source") or "C", case_name=item["case_names"], case_name_short=case_name_short, sha1=sha1_hash, download_url=item["download_urls"], blocked=blocked, date_blocked=date_blocked, ) cf = ContentFile(content) extension = get_extension(content) if extension not in [".mp3", ".wma"]: extension = "." + item["download_urls"].lower().rsplit(".", 1)[1] file_name = trunc(item["case_names"].lower(), 75) + extension audio_file.file_with_date = docket.date_argued audio_file.local_path_original_file.save(file_name, cf, save=False) return docket, audio_file
def test_trunc(self) -> None: """Does trunc give us the results we expect?""" class TestType(TypedDict, total=False): length: int result: str ellipsis: str s = "Henry wants apple." tests: Tuple[TestType, ...] = ( # Simple case {"length": 13, "result": "Henry wants"}, # Off by one cases {"length": 4, "result": "Henr"}, {"length": 5, "result": "Henry"}, {"length": 6, "result": "Henry"}, # Do we include the length of the ellipsis when measuring? {"length": 12, "ellipsis": "...", "result": "Henry..."}, # What happens when an alternate ellipsis is used instead? {"length": 15, "ellipsis": "....", "result": "Henry wants...."}, # Do we cut properly when no spaces are found? {"length": 2, "result": "He"}, # Do we cut properly when ellipsizing if no spaces found? {"length": 6, "ellipsis": "...", "result": "Hen..."}, # Do we return the whole s when length >= s? {"length": 50, "result": s}, ) for test_dict in tests: result = trunc( s=s, length=test_dict["length"], ellipsis=test_dict.get("ellipsis", None), ) self.assertEqual( result, test_dict["result"], msg="Failed with dict: %s.\n" "%s != %s" % (test_dict, result, test_dict["result"]), ) self.assertTrue( len(result) <= test_dict["length"], msg="Failed with dict: %s.\n" "%s is longer than %s" % (test_dict, result, test_dict["length"]), )
def view_authorities(request, pk, slug): cluster = get_object_or_404(OpinionCluster, pk=pk) return render( request, "view_opinion_authorities.html", { "title": "%s, %s" % (trunc(best_case_name(cluster), 100), cluster.citation_string), "cluster": cluster, "private": cluster.blocked or cluster.has_private_authority, "authorities_with_data": cluster.authorities_with_data, }, )
def view_recap_document( request: HttpRequest, docket_id: Optional[int] = None, doc_num: Optional[int] = None, att_num: Optional[int] = None, slug: str = "", ) -> HttpResponse: """This view can either load an attachment or a regular document, depending on the URL pattern that is matched. """ try: rd = RECAPDocument.objects.filter( docket_entry__docket__id=docket_id, document_number=doc_num, attachment_number=att_num, ).order_by("pk")[0] except IndexError: raise Http404("No RECAPDocument matches the given query.") title = make_rd_title(rd) rd = make_thumb_if_needed(request, rd) try: fave = Favorite.objects.get(recap_doc_id=rd.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm( initial={ "recap_doc_id": rd.pk, "name": trunc(title, 100, ellipsis="..."), } ) else: favorite_form = FavoriteForm(instance=fave) return render( request, "recap_document.html", { "rd": rd, "title": title, "favorite_form": favorite_form, "private": True, # Always True for RECAP docs. }, )
def process_audio_file(self, pk) -> None: """Given the key to an audio file, extract its content and add the related meta data to the database. :param self: A Celery task object :param pk: Audio file pk :return: None """ af = Audio.objects.get(pk=pk) bte_audio_response = convert_and_clean_audio(af) bte_audio_response.raise_for_status() audio_obj = bte_audio_response.json() cf = ContentFile(base64.b64decode(audio_obj["audio_b64"])) file_name = trunc(best_case_name(af).lower(), 72) + "_cl.mp3" af.file_with_date = af.docket.date_argued af.local_path_mp3.save(file_name, cf, save=False) af.duration = audio_obj["duration"] af.processing_complete = True af.save()
def view_recap_document(request, docket_id=None, doc_num=None, att_num=None, slug=''): """This view can either load an attachment or a regular document, depending on the URL pattern that is matched. """ item = get_object_or_404( RECAPDocument, docket_entry__docket__id=docket_id, document_number=doc_num, attachment_number=att_num, ) title = '%sDocument #%s%s in %s' % ( '%s – ' % item.description if item.description else '', item.document_number, ', Attachment #%s' % item.attachment_number if item.document_type == RECAPDocument.ATTACHMENT else '', best_case_name(item.docket_entry.docket), ) try: fave = Favorite.objects.get(recap_doc_id=item.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ 'recap_doc_id': item.pk, 'name': trunc(title, 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) return render( request, 'recap_document.html', { 'document': item, 'title': title, 'favorite_form': favorite_form, 'private': True, # Always True for RECAP docs. })
def process_audio_file(pk): """Given the key to an audio file, extract its content and add the related meta data to the database. """ af = Audio.objects.get(pk=pk) tmp_path = os.path.join('/tmp', 'audio_' + uuid.uuid4().hex + '.mp3') avconv_command = [ 'avconv', '-i', af.local_path_original_file.path, '-ar', '22050', # sample rate (audio samples/s) of 22050Hz '-ab', '48k', # constant bit rate (sample resolution) of 48kbps tmp_path ] try: _ = subprocess.check_output( avconv_command, stderr=subprocess.STDOUT ) except subprocess.CalledProcessError as e: print('avconv failed command: %s\nerror code: %s\noutput: %s\n%s' % (avconv_command, e.returncode, e.output, traceback.format_exc())) raise set_mp3_meta_data(af, tmp_path) try: with open(tmp_path, 'r') as mp3: cf = ContentFile(mp3.read()) file_name = trunc(best_case_name(af).lower(), 72) + '_cl.mp3' af.file_with_date = af.docket.date_argued af.local_path_mp3.save(file_name, cf, save=False) except: msg = ("Unable to save mp3 to audio_file in scraper.tasks." "process_audio_file for item: %s\n" "Traceback:\n" "%s" % (af.pk, traceback.format_exc())) print(msg) ErrorLog.objects.create(log_level='CRITICAL', court=af.docket.court, message=msg) af.duration = eyed3.load(tmp_path).info.time_secs af.processing_complete = True af.save()
def test_trunc(self): """Does trunc give us the results we expect?""" s = 'Henry wants apple.' tests = ( # Simple case {'length': 13, 'result': 'Henry wants'}, # Off by one cases {'length': 4, 'result': 'Henr'}, {'length': 5, 'result': 'Henry'}, {'length': 6, 'result': 'Henry'}, # Do we include the length of the ellipsis when measuring? {'length': 12, 'ellipsis': '...', 'result': 'Henry...'}, # What happens when an alternate ellipsis is used instead? {'length': 15, 'ellipsis': '....', 'result': 'Henry wants....'}, # Do we cut properly when no spaces are found? {'length': 2, 'result': 'He'}, # Do we cut properly when ellipsizing if no spaces found? {'length': 6, 'ellipsis': '...', 'result': 'Hen...'}, # Do we return the whole s when length >= s? {'length': 50, 'result': s} ) for test_dict in tests: result = trunc( s=s, length=test_dict['length'], ellipsis=test_dict.get('ellipsis', None), ) self.assertEqual( result, test_dict['result'], msg='Failed with dict: %s.\n' '%s != %s' % (test_dict, result, test_dict['result']) ) self.assertTrue( len(result) <= test_dict['length'], msg="Failed with dict: %s.\n" "%s is longer than %s" % (test_dict, result, test_dict['length']) )
def save(self, *args, **kwargs): self.slug = slugify(trunc(self.name_full, 158)) self.full_clean() super(Person, self).save(*args, **kwargs)
stderr=subprocess.STDOUT ) except subprocess.CalledProcessError, e: print 'avconv failed command: %s\nerror code: %s\noutput: %s\n' % \ (avconv_command, e.returncode, e.output) print traceback.format_exc() raise # Have to do this last because otherwise the mp3 hasn't yet been generated. set_mp3_meta_data(af, path_to_tmp_location) af.duration = eyed3.load(path_to_tmp_location).info.time_secs with open(path_to_tmp_location, 'r') as mp3: try: cf = ContentFile(mp3.read()) file_name = trunc(best_case_name(af).lower(), 72) + '_cl.mp3' af.file_with_date = af.docket.date_argued af.local_path_mp3.save(file_name, cf, save=False) except: msg = "Unable to save mp3 to audio_file in scraper.tasks.process_" \ "audio_file for item: %s\nTraceback:\n%s" % \ (af.pk, traceback.format_exc()) print msg ErrorLog(log_level='CRITICAL', court=af.docket.court, message=msg).save() af.processing_complete = True af.save() os.remove(path_to_tmp_location)
def download_and_save(): """This function is run in many threads simultaneously. Each thread runs so long as there are items in the queue. Once an item is found, it's downloaded and saved. The number of items that can be concurrently saved is determined by the number of threads that are running this function. """ while True: item = queue.get() logger.info("%s: Attempting to add item at: %s" % (threading.current_thread().name, item['url'])) try: msg, r = get_binary_content( item['url'], {}, ) except: logger.info("%s: Unable to get item at: %s" % (threading.current_thread().name, item['url'])) queue.task_done() if msg: logger.warn(msg) queue.task_done() continue sha1_hash = hashlib.sha1(r.content).hexdigest() if Audio.objects.filter(sha1=sha1_hash).exists(): # Simpsons did it! Try the next one. logger.info("%s: Item already exists, moving to next item." % threading.current_thread().name) queue.task_done() continue else: # New item, onwards! logger.info('%s: Adding new document found at: %s' % (threading.current_thread().name, item['url'])) audio_file = Audio( source='H', sha1=sha1_hash, case_name=item['case_name'], download_url=item['url'], processing_complete=False, ) if item['judges']: audio_file.judges = item['judges'] if item['docket_number']: audio_file.docket.docket_number = item['docket_number'] court = Court.objects.get(pk=item['court_code']) docket = Docket( case_name=item['case_name'], court=court, date_argued=item['date_argued'], ) # Make and associate the file object try: cf = ContentFile(r.content) extension = get_extension(r.content) if extension not in ['.mp3', '.wma']: extension = '.' + item['url'].rsplit('.', 1)[1] # See bitbucket issue #215 for why this must be # lower-cased. file_name = trunc(item['case_name'].lower(), 75) + extension audio_file.local_path_original_file.save(file_name, cf, save=False) except: msg = 'Unable to save binary. Deleted document: %s.\n%s' % \ (item['case_name'], traceback.format_exc()) logger.critical(msg) queue.task_done() docket.save() audio_file.docket = docket audio_file.save(index=False) random_delay = random.randint(0, 3600) process_audio_file.apply_async( (audio_file.pk,), countdown=random_delay ) logger.info("%s: Successfully added audio file %s: %s" % (threading.current_thread().name, audio_file.pk, audio_file.case_name))
def view_opinion(request, pk, _): """Using the cluster ID, return the cluster of opinions. We also test if the cluster ID is a favorite for the user, and send data if needed. If it's a favorite, we send the bound form for the favorite so it can populate the form on the page. If it is not a favorite, we send the unbound form. """ # Look up the court, cluster, title and favorite information cluster = get_object_or_404(OpinionCluster, pk=pk) title = ', '.join([s for s in [ trunc(best_case_name(cluster), 100, ellipsis="..."), cluster.citation_string, ] if s.strip()]) has_downloads = False for sub_opinion in cluster.sub_opinions.all(): if sub_opinion.local_path or sub_opinion.download_url: has_downloads = True break get_string = search_utils.make_get_string(request) try: fave = Favorite.objects.get(cluster_id=cluster.pk, user=request.user) except (ObjectDoesNotExist, TypeError): # Not favorited or anonymous user favorite_form = FavoriteForm(initial={ 'cluster_id': cluster.pk, 'name': trunc(best_case_name(cluster), 100, ellipsis='...'), }) else: favorite_form = FavoriteForm(instance=fave) if not is_bot(request): # Get the citing results from Solr for speed. Only do this for humans # to save on disk usage. conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r') q = { 'q': 'cites:({ids})'.format( ids=' OR '.join([str(pk) for pk in (cluster.sub_opinions .values_list('pk', flat=True))]) ), 'rows': 5, 'start': 0, 'sort': 'citeCount desc', 'caller': 'view_opinion', } citing_clusters = conn.raw_query(**q).execute() else: citing_clusters = None return render(request, 'view_opinion.html', { 'title': title, 'cluster': cluster, 'has_downloads': has_downloads, 'favorite_form': favorite_form, 'get_string': get_string, 'private': cluster.blocked, 'citing_clusters': citing_clusters, 'top_authorities': cluster.authorities[:5], })
def merge_cases_simple(new, target_id): """Add `new` to the database, merging with target_id Merging is done by picking the best fields from each item. """ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # !! THIS CODE IS OUT OF DATE AND UNMAINTAINED. FEEL FREE TO FIX IT, BUT !! # !! DO NOT TRUST IT IN ITS CURRENT STATE. !! # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! target = OpinionCluster.objects.get(pk=target_id) print "Merging %s with" % new.case_name print " %s" % target.case_name cached_source = target.source # Original value is needed below. if target.source == 'C': target.source = 'LC' elif target.source == 'R': target.source = 'LR' elif target.source == 'CR': target.source = 'LCR' # Add the URL if it's not a court one, replacing public.resource.org's # info in some cases. if cached_source == 'R': target.download_url = new.download_url # Recreate the slug from the new case name (this changes the URL, but the # old will continue working) target.slug = slugify(trunc(new.case_name, 75)) # Take the case name from the new item; they tend to be pretty good target.case_name = new.case_name # Add the docket number if the old doesn't exist, but keep the old if one # does. if not target.docket.docket_number: target.docket.docket_number = new.docket.docket_number # Get the citations from the new item (ditch the old). target.federal_cite_one = new.federal_cite_one target.federal_cite_two = new.federal_cite_two target.federal_cite_three = new.federal_cite_three target.state_cite_one = new.state_cite_one target.state_cite_two = new.state_cite_two target.state_cite_three = new.state_cite_three target.state_cite_regional = new.state_cite_regional target.specialty_cite_one = new.specialty_cite_one target.scotus_early_cite = new.scotus_early_cite target.lexis_cite = new.lexis_cite target.westlaw_cite = new.westlaw_cite target.neutral_cite = new.neutral_cite # Add judge information if lacking. New is dirty, but better than none. if not target.judges: target.judges = new.judges # Add the text. target.html_lawbox, blocked = anonymize(new.html) if blocked: target.blocked = True target.date_blocked = now() target.extracted_by_ocr = False # No longer true for any LB case.
af.judges = item['judges'] if item['docket_number']: af.docket.docket_number = item['docket_number'] court = Court.objects.get(pk=item['court_code']) docket.court = court # Fix the files. First save the location of the old files. original_local_path = af.local_path_original_file.path original_mp3_path = af.local_path_mp3.path # Create a new file with the contents of the old and a corrected # name. This is only in memory for the moment. cf = ContentFile(af.local_path_original_file.read()) extension = '.' + af.local_path_original_file.path.rsplit('.', 1)[1] file_name = trunc(item['case_name'].lower(), 75) + extension af.local_path_original_file.save(file_name, cf, save=False) # Create a new mp3 file with the new contents cf = ContentFile(af.local_path_mp3.read()) file_name = trunc(af.case_name.lower(), 72) + '_cl.mp3' af.local_path_mp3.save(file_name, cf, save=False) # Save things so they can be referenced in a sec. docket.save() af.save(index=False) # Update the ID3 information and duration data. new_mp3_path = af.local_path_mp3.path logger.info("Updating mpr at: %s" % new_mp3_path) set_mp3_meta_data(af, new_mp3_path)
def __unicode__(self): return "<DocketEntry:%s ---> %s >" % ( self.pk, trunc(self.description, 50, ellipsis="...") )