示例#1
0
def core_docket_data(request, pk):
    """Gather the core data for a docket, party, or IDB page."""
    docket = get_object_or_404(Docket, pk=pk)
    title = ', '.join([s for s in [
        trunc(best_case_name(docket), 100, ellipsis="..."),
        docket.docket_number,
    ] if s.strip()])

    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'docket_id': docket.pk,
            'name': trunc(best_case_name(docket), 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    has_alert = False
    if request.user.is_authenticated:
        has_alert = DocketAlert.objects.filter(docket=docket,
                                               user=request.user).exists()

    return docket, {
        'docket': docket,
        'title': title,
        'favorite_form': favorite_form,
        'has_alert': has_alert,
        'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'),
        'private': docket.blocked,
    }
示例#2
0
def core_docket_data(request, pk):
    """Gather the core data for a docket, party, or IDB page."""
    docket = get_object_or_404(Docket, pk=pk)
    title = ', '.join([
        s for s in [
            trunc(best_case_name(docket), 100, ellipsis="..."),
            docket.docket_number,
        ] if s.strip()
    ])

    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                'docket_id': docket.pk,
                'name': trunc(best_case_name(docket), 100, ellipsis='...'),
            })
    else:
        favorite_form = FavoriteForm(instance=fave)

    has_alert = False
    if request.user.is_authenticated:
        has_alert = DocketAlert.objects.filter(docket=docket,
                                               user=request.user).exists()

    return docket, {
        'docket': docket,
        'title': title,
        'favorite_form': favorite_form,
        'has_alert': has_alert,
        'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'),
        'private': docket.blocked,
    }
示例#3
0
def view_opinion(request, pk, _):
    """Using the cluster ID, return the cluster of opinions.

    We also test if the cluster ID is a favorite for the user, and send data
    if needed. If it's a favorite, we send the bound form for the favorite so
    it can populate the form on the page. If it is not a favorite, we send the
    unbound form.
    """
    # Look up the court, cluster, title and favorite information
    cluster = get_object_or_404(OpinionCluster, pk=pk)
    title = '%s, %s' % (
        trunc(best_case_name(cluster), 100),
        cluster.citation_string,
    )
    get_string = search_utils.make_get_string(request)

    try:
        fave = Favorite.objects.get(
            cluster_id=cluster.pk,
            user=request.user,
        )
        favorite_form = FavoriteForm(instance=fave)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                'cluster_id': cluster.pk,
                'name': trunc(best_case_name(cluster), 100, ellipsis='...'),
            }
        )

    # Get the citing results from Solr for speed.
    conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r')
    q = {
        'q': 'cites:({ids})'.format(
            ids=' OR '.join([str(pk) for pk in
                             (cluster.sub_opinions
                              .values_list('pk', flat=True))])
        ),
        'rows': 5,
        'start': 0,
        'sort': 'citeCount desc',
        'caller': 'view_opinion',
    }
    citing_clusters = conn.raw_query(**q).execute()

    return render_to_response(
        'view_opinion.html',
        {
            'title': title,
            'cluster': cluster,
            'favorite_form': favorite_form,
            'get_string': get_string,
            'private': cluster.blocked,
            'citing_clusters': citing_clusters,
            'top_authorities': cluster.authorities[:5],
        },
        RequestContext(request)
    )
示例#4
0
def view_parties(request, docket_id, slug):
    """Show the parties and attorneys tab on the docket."""
    docket = get_object_or_404(Docket, pk=docket_id)
    title = ', '.join([
        s for s in [
            trunc(best_case_name(docket), 100, ellipsis="..."),
            docket.docket_number,
        ] if s.strip()
    ])
    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                'docket_id': docket.pk,
                'name': trunc(best_case_name(docket), 100, ellipsis='...'),
            })
    else:
        favorite_form = FavoriteForm(instance=fave)

    # We work with this data at the level of party_types so that we can group
    # the parties by this field. From there, we do a whole mess of prefetching,
    # which reduces the number of queries needed for this down to four instead
    # of potentially thousands (good times!)
    party_types = docket.party_types.select_related('party').prefetch_related(
        Prefetch(
            'party__roles',
            queryset=Role.objects.filter(docket=docket).order_by(
                'attorney_id', 'role',
                'date_action').select_related('attorney').prefetch_related(
                    Prefetch(
                        'attorney__organizations',
                        queryset=AttorneyOrganization.objects.filter(
                            attorney_organization_associations__docket=docket).
                        distinct(),
                        to_attr='firms_in_docket',
                    )))).order_by('name', 'party__name')

    parties = []
    for party_type_name, party_types in groupby(party_types, lambda x: x.name):
        party_types = list(party_types)
        parties.append({
            'party_type_name': party_type_name,
            'party_type_objects': party_types
        })

    return render(
        request, 'docket_parties.html', {
            'docket': docket,
            'title': title,
            'parties': parties,
            'favorite_form': favorite_form,
            'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'),
            'private': docket.blocked,
        })
示例#5
0
def view_opinion(request, pk, _):
    """Using the cluster ID, return the cluster of opinions.

    We also test if the cluster ID is a favorite for the user, and send data
    if needed. If it's a favorite, we send the bound form for the favorite so
    it can populate the form on the page. If it is not a favorite, we send the
    unbound form.
    """
    # Look up the court, cluster, title and favorite information
    cluster = get_object_or_404(OpinionCluster, pk=pk)
    title = ", ".join([
        s for s in [
            trunc(best_case_name(cluster), 100, ellipsis="..."),
            cluster.citation_string,
        ] if s.strip()
    ])
    has_downloads = False
    for sub_opinion in cluster.sub_opinions.all():
        if sub_opinion.local_path or sub_opinion.download_url:
            has_downloads = True
            break
    get_string = make_get_string(request)

    try:
        fave = Favorite.objects.get(cluster_id=cluster.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                "cluster_id": cluster.pk,
                "name": trunc(best_case_name(cluster), 100, ellipsis="..."),
            })
    else:
        favorite_form = FavoriteForm(instance=fave)

    citing_clusters = get_citing_clusters_with_cache(cluster, is_bot(request))

    return render(
        request,
        "view_opinion.html",
        {
            "title": title,
            "cluster": cluster,
            "has_downloads": has_downloads,
            "favorite_form": favorite_form,
            "get_string": get_string,
            "private": cluster.blocked,
            "citing_clusters": citing_clusters,
            "top_authorities": cluster.authorities_with_data[:5],
            "authorities_count": len(cluster.authorities_with_data),
        },
    )
示例#6
0
def upload_free_opinion_to_ia(self, rd_pk):
    rd = RECAPDocument.objects.get(pk=rd_pk)
    d = rd.docket_entry.docket
    file_name = get_document_filename(
        d.court_id,
        d.pacer_case_id,
        rd.document_number,
        0,  # Attachment number is zero for all free opinions.
    )
    bucket_name = get_bucket_name(d.court_id, d.pacer_case_id)
    try:
        responses = upload_to_ia(
            identifier=bucket_name,
            files=rd.filepath_local.path,
            metadata={
                'title': best_case_name(d),
                'collection': settings.IA_COLLECTIONS,
                'contributor': '<a href="https://free.law">Free Law Project</a>',
                'court': d.court_id,
                'language': 'eng',
                'mediatype': 'texts',
                'description': "This item represents a case in PACER, "
                               "the U.S. Government's website for "
                               "federal case data. If you wish to see "
                               "the entire case, please consult PACER "
                               "directly.",
                'licenseurl': 'https://www.usa.gov/government-works',
            },
        )
    except (OverloadedException, ExpatError) as exc:
        # Overloaded: IA wants us to slow down.
        # ExpatError: The syntax of the XML file that's supposed to be returned
        #             by IA is bad (or something).
        if self.request.retries == self.max_retries:
            # Give up for now. It'll get done next time cron is run.
            return
        raise self.retry(exc=exc)
    except HTTPError as exc:
        if exc.response.status_code in [
            HTTP_403_FORBIDDEN,    # Can't access bucket, typically.
            HTTP_400_BAD_REQUEST,  # Corrupt PDF, typically.
        ]:
            return [exc.response]
        if self.request.retries == self.max_retries:
            # This exception is also raised when the endpoint is overloaded, but
            # doesn't get caught in the OverloadedException below due to
            # multiple processes running at the same time. Just give up for now.
            return
        raise self.retry(exc=exc)
    except (requests.Timeout, requests.RequestException) as exc:
        logger.warning("Timeout or unknown RequestException. Unable to upload "
                       "to IA. Trying again if retries not exceeded: %s" % rd)
        if self.request.retries == self.max_retries:
            # Give up for now. It'll get done next time cron is run.
            return
        raise self.retry(exc=exc)
    if all(r.ok for r in responses):
        rd.filepath_ia = "https://archive.org/download/%s/%s" % (
            bucket_name, file_name)
        rd.save(do_extraction=False, index=False)
示例#7
0
    def save(self, *args, **kwargs):
        self.slug = slugify(trunc(best_case_name(self), 75))
        if self.source == 1 and not self.pacer_case_id:
            raise ValidationError("pacer_case_id cannot be Null or empty in "
                                  "RECAP documents.")

        super(Docket, self).save(*args, **kwargs)
示例#8
0
 def caption(self):
     """Make a proper caption"""
     caption = best_case_name(self)
     if self.neutral_cite:
         caption += ", %s" % self.neutral_cite
         return caption  # neutral cites lack the parentheses, so we're done here.
     elif self.federal_cite_one:
         caption += ", %s" % self.federal_cite_one
     elif self.specialty_cite_one:
         caption += ", %s" % self.specialty_cite_one
     elif self.state_cite_regional:
         caption += ", %s" % self.state_cite_regional
     elif self.state_cite_one:
         caption += ", %s" % self.state_cite_one
     elif self.westlaw_cite and self.lexis_cite:
         # If both WL and LEXIS
         caption += ", %s, %s" % (self.westlaw_cite, self.lexis_cite)
     elif self.westlaw_cite:
         # If only WL
         caption += ", %s" % self.westlaw_cite
     elif self.lexis_cite:
         # If only LEXIS
         caption += ", %s" % self.lexis_cite
     elif self.docket.docket_number:
         caption += ", %s" % self.docket.docket_number
     caption += ' ('
     if self.docket.court.citation_string != 'SCOTUS':
         caption += re.sub(' ', '&nbsp;', self.docket.court.citation_string)
         caption += '&nbsp;'
     caption += '%s)' % self.date_filed.isoformat().split('-')[
         0]  # b/c strftime f's up before 1900.
     return caption
示例#9
0
    def save(self, *args, **kwargs):
        self.slug = slugify(trunc(best_case_name(self), 75))
        if self.source == 1 and not self.pacer_case_id:
            raise ValidationError("pacer_case_id cannot be Null or empty in "
                                  "RECAP documents.")

        super(Docket, self).save(*args, **kwargs)
示例#10
0
def view_recap_document(request, docket_id=None, doc_num=None,  att_num=None,
                        slug=''):
    """This view can either load an attachment or a regular document,
    depending on the URL pattern that is matched.
    """
    item = get_object_or_404(
        RECAPDocument,
        docket_entry__docket__id=docket_id,
        document_number=doc_num,
        attachment_number=att_num,
    )
    title = '%sDocument #%s%s in %s' % (
        '%s &ndash; ' % item.description if item.description else '',
        item.document_number,
        ', Attachment #%s' % item.attachment_number if
        item.document_type == RECAPDocument.ATTACHMENT else '',
        best_case_name(item.docket_entry.docket),
    )
    try:
        fave = Favorite.objects.get(recap_doc_id=item.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'recap_doc_id': item.pk,
            'name': trunc(title, 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    return render(request, 'recap_document.html', {
        'document': item,
        'title': title,
        'favorite_form': favorite_form,
        'private': True,  # Always True for RECAP docs.
    })
示例#11
0
def upload_recap_json(self, pk):
    """Make a JSON object for a RECAP docket and upload it to IA"""
    d, json_str = generate_ia_json(pk)

    file_name = get_docket_filename(d.court_id, d.pacer_case_id, 'json')
    bucket_name = get_bucket_name(d.court_id, d.pacer_case_id)
    responses = upload_to_ia(
        self,
        identifier=bucket_name,
        files={file_name: StringIO(json_str)},
        title=best_case_name(d),
        collection=settings.IA_COLLECTIONS,
        court_id=d.court_id,
        source_url='https://www.courtlistener.com%s' % d.get_absolute_url(),
        media_type='texts',
        description="This item represents a case in PACER, the U.S. "
        "Government's website for federal case data. This "
        "information is uploaded quarterly. To see our most "
        "recent version please use the source url parameter, "
        "linked below. To see the canonical source for this data, "
        "please consult PACER directly.",
    )
    if responses is None:
        increment_failure_count(d)
        return

    if all(r.ok for r in responses):
        d.ia_upload_failure_count = None
        d.ia_date_first_changed = None
        d.ia_needs_upload = False
        d.filepath_ia_json = "https://archive.org/download/%s/%s" % (
            bucket_name, file_name)
        d.save()
    else:
        increment_failure_count(d)
示例#12
0
    def save(self, index=True, force_commit=False, *args, **kwargs):
        self.slug = slugify(trunc(best_case_name(self), 75))
        super(OpinionCluster, self).save(*args, **kwargs)
        if index:
            from cl.search.tasks import add_or_update_cluster

            add_or_update_cluster.delay(self.pk, force_commit)
示例#13
0
def core_docket_data(
    request: HttpRequest,
    pk: int,
) -> Tuple[Docket, Dict[str, Union[bool, str, Docket, FavoriteForm]]]:
    """Gather the core data for a docket, party, or IDB page."""
    docket = get_object_or_404(Docket, pk=pk)
    title = make_docket_title(docket)

    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                "docket_id": docket.pk,
                "name": trunc(best_case_name(docket), 100, ellipsis="..."),
            }
        )
    else:
        favorite_form = FavoriteForm(instance=fave)

    has_alert = user_has_alert(request.user, docket)

    return (
        docket,
        {
            "docket": docket,
            "title": title,
            "favorite_form": favorite_form,
            "has_alert": has_alert,
            "timezone": COURT_TIMEZONES.get(docket.court_id, "US/Eastern"),
            "private": docket.blocked,
        },
    )
示例#14
0
def view_audio_file(request, pk, _):
    """Using the ID, return the oral argument page.

    We also test if the item is a favorite and send data as such.
    """
    af = get_object_or_404(Audio, pk=pk)
    title = trunc(af.case_name, 100)
    get_string = search_utils.make_get_string(request)

    try:
        fave = Favorite.objects.get(audio_id=af.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                'audio_id': af.pk,
                'name': trunc(best_case_name(af.docket), 100, ellipsis='...'),
            })
    else:
        favorite_form = FavoriteForm(instance=fave)

    return render(
        request, 'oral_argument.html', {
            'title': title,
            'af': af,
            'favorite_form': favorite_form,
            'get_string': get_string,
            'private': af.blocked,
        })
示例#15
0
def upload_pdf_to_ia(self, rd_pk):
    rd = RECAPDocument.objects.get(pk=rd_pk)
    d = rd.docket_entry.docket
    file_name = get_document_filename(
        d.court_id,
        d.pacer_case_id,
        rd.document_number,
        rd.attachment_number or 0,
    )
    bucket_name = get_bucket_name(d.court_id, d.pacer_case_id)
    responses = upload_to_ia(
        self,
        identifier=bucket_name,
        files=rd.filepath_local.path,
        title=best_case_name(d),
        collection=settings.IA_COLLECTIONS,
        court_id=d.court_id,
        source_url='https://www.courtlistener.com%s' % rd.get_absolute_url(),
        media_type='texts',
        description="This item represents a case in PACER, the U.S. "
        "Government's website for federal case data. If you wish "
        "to see the entire case, please consult PACER directly.",
    )
    if responses is None:
        increment_failure_count(rd)
        return

    if all(r.ok for r in responses):
        rd.ia_upload_failure_count = None
        rd.filepath_ia = "https://archive.org/download/%s/%s" % (bucket_name,
                                                                 file_name)
        rd.save()
    else:
        increment_failure_count(rd)
示例#16
0
def view_audio_file(request, pk, _):
    """Using the ID, return the oral argument page.

    We also test if the item is a favorite and send data as such.
    """
    af = get_object_or_404(Audio, pk=pk)
    title = trunc(af.case_name, 100)
    get_string = search_utils.make_get_string(request)

    try:
        fave = Favorite.objects.get(audio_id=af.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'audio_id': af.pk,
            'name': trunc(best_case_name(af.docket), 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    return render(request, 'oral_argument.html', {
        'title': title,
        'af': af,
        'favorite_form': favorite_form,
        'get_string': get_string,
        'private': af.blocked,
    })
示例#17
0
 def caption(self):
     """Make a proper caption"""
     caption = best_case_name(self)
     if self.neutral_cite:
         caption += ", %s" % self.neutral_cite
         return caption  # neutral cites lack the parentheses, so we're done here.
     elif self.federal_cite_one:
         caption += ", %s" % self.federal_cite_one
     elif self.federal_cite_two:
         caption += ", %s" % self.federal_cite_two
     elif self.federal_cite_three:
         caption += ", %s" % self.federal_cite_three
     elif self.specialty_cite_one:
         caption += ", %s" % self.specialty_cite_one
     elif self.state_cite_regional:
         caption += ", %s" % self.state_cite_regional
     elif self.state_cite_one:
         caption += ", %s" % self.state_cite_one
     elif self.westlaw_cite and self.lexis_cite:
         # If both WL and LEXIS
         caption += ", %s, %s" % (self.westlaw_cite, self.lexis_cite)
     elif self.westlaw_cite:
         # If only WL
         caption += ", %s" % self.westlaw_cite
     elif self.lexis_cite:
         # If only LEXIS
         caption += ", %s" % self.lexis_cite
     elif self.docket.docket_number:
         caption += ", %s" % self.docket.docket_number
     caption += ' ('
     if self.docket.court.citation_string != 'SCOTUS':
         caption += re.sub(' ', '&nbsp;', self.docket.court.citation_string)
         caption += '&nbsp;'
     caption += '%s)' % self.date_filed.isoformat().split('-')[0]  # b/c strftime f's up before 1900.
     return caption
示例#18
0
def upload_audio_to_ia(self, af_pk):
    af = Audio.objects.get(pk=af_pk)
    d = af.docket
    file_name = make_af_filename(
        d.court_id, d.docket_number, d.date_argued,
        af.local_path_original_file.path.rsplit('.', 1)[1])
    bucket_name = get_bucket_name(d.court_id, slugify(d.docket_number))
    responses = upload_to_ia(
        self,
        identifier=bucket_name,
        files={file_name: af.local_path_original_file.path},
        title=best_case_name(d),
        collection=settings.IA_OA_COLLECTIONS,
        court_id=d.court_id,
        source_url='https://www.courtlistener.com%s' % af.get_absolute_url(),
        media_type='audio',
        description='This item represents an oral argument audio file as '
        'scraped from a U.S. Government website by Free Law '
        'Project.',
    )
    if responses is None:
        increment_failure_count(af)
        return

    if all(r.ok for r in responses):
        af.ia_upload_failure_count = None
        af.filepath_ia = "https://archive.org/download/%s/%s" % (bucket_name,
                                                                 file_name)
        af.save()
    else:
        increment_failure_count(af)
示例#19
0
def upload_free_opinion_to_ia(self, rd_pk):
    rd = RECAPDocument.objects.get(pk=rd_pk)
    d = rd.docket_entry.docket
    file_name = get_document_filename(
        d.court_id,
        d.pacer_case_id,
        rd.document_number,
        0,  # Attachment number is zero for all free opinions.
    )
    bucket_name = get_bucket_name(d.court_id, d.pacer_case_id)
    try:
        responses = upload_to_ia(
            identifier=bucket_name,
            files=rd.filepath_local.path,
            metadata={
                'title': best_case_name(d),
                'collection': settings.IA_COLLECTIONS,
                'contributor': '<a href="https://free.law">Free Law Project</a>',
                'court': d.court_id,
                'language': 'eng',
                'mediatype': 'texts',
                'description': "This item represents a case in PACER, "
                               "the U.S. Government's website for "
                               "federal case data. If you wish to see "
                               "the entire case, please consult PACER "
                               "directly.",
                'licenseurl': 'https://www.usa.gov/government-works',
            },
        )
    except (OverloadedException, ExpatError) as exc:
        # Overloaded: IA wants us to slow down.
        # ExpatError: The syntax of the XML file that's supposed to be returned
        #             by IA is bad (or something).
        if self.request.retries == self.max_retries:
            # Give up for now. It'll get done next time cron is run.
            return
        raise self.retry(exc=exc)
    except HTTPError as exc:
        if exc.response.status_code in [
            HTTP_403_FORBIDDEN,    # Can't access bucket, typically.
            HTTP_400_BAD_REQUEST,  # Corrupt PDF, typically.
        ]:
            return [exc.response]
        if self.request.retries == self.max_retries:
            # This exception is also raised when the endpoint is overloaded, but
            # doesn't get caught in the OverloadedException below due to
            # multiple processes running at the same time. Just give up for now.
            return
        raise self.retry(exc=exc)
    except (requests.Timeout, requests.RequestException) as exc:
        logger.warning("Timeout or unknown RequestException. Unable to upload "
                       "to IA. Trying again if retries not exceeded: %s" % rd)
        if self.request.retries == self.max_retries:
            # Give up for now. It'll get done next time cron is run.
            return
        raise self.retry(exc=exc)
    if all(r.ok for r in responses):
        rd.filepath_ia = "https://archive.org/download/%s/%s" % (
            bucket_name, file_name)
        rd.save(do_extraction=False, index=False)
示例#20
0
def upload_recap_json(self, pk):
    """Make a JSON object for a RECAP docket and upload it to IA"""
    # This is a pretty highly optimized query that uses only 13 hits to the DB
    # when generating a docket JSON rendering, regardless of how many related
    # objects the docket has such as docket entries, parties, etc.
    ds = Docket.objects.filter(pk=pk).select_related(
        'originating_court_information',
    ).prefetch_related(
        'panel',
        'parties__attorneys__roles',
        'parties__party_types__criminal_complaints',
        'parties__party_types__criminal_counts',
        # Django appears to have a bug where you can't defer a field on a
        # queryset where you prefetch the values. If you try to, it crashes.
        # We should be able to just do the prefetch below like the ones above
        # and then do the defer statement at the end, but that throws an error.
        Prefetch(
            'docket_entries__recap_documents',
            queryset=RECAPDocument.objects.all().defer('plain_text')
        )
    )
    d = ds[0]
    renderer = JSONRenderer()
    json_str = renderer.render(
        IADocketSerializer(d).data,
        accepted_media_type='application/json; indent=2',
    )

    file_name = get_docket_filename(d.court_id, d.pacer_case_id, 'json')
    bucket_name = get_bucket_name(d.court_id, d.pacer_case_id)
    responses = upload_to_ia(
        self,
        identifier=bucket_name,
        files={file_name: StringIO(json_str)},
        title=best_case_name(d),
        collection=settings.IA_COLLECTIONS,
        court_id=d.court_id,
        source_url='https://www.courtlistener.com%s' % d.get_absolute_url(),
        media_type='texts',
        description="This item represents a case in PACER, the U.S. "
                    "Government's website for federal case data. This "
                    "information is uploaded quarterly. To see our most "
                    "recent version please use the source url parameter, "
                    "linked below. To see the canonical source for this data, "
                    "please consult PACER directly.",
    )
    if responses is None:
        increment_failure_count(d)
        return

    if all(r.ok for r in responses):
        d.ia_upload_failure_count = None
        d.ia_date_first_changed = None
        d.filepath_ia_json = "https://archive.org/download/%s/%s" % (
            bucket_name, file_name)
        mark_ia_upload_needed(d)
        d.save()
    else:
        increment_failure_count(d)
示例#21
0
def view_docket(request, pk, slug):
    docket = get_object_or_404(Docket, pk=pk)
    if not is_bot(request):
        with suppress_autotime(docket, ['date_modified']):
            cached_count = docket.view_count
            docket.view_count = F('view_count') + 1
            docket.save()
            docket.view_count = cached_count + 1

    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                'docket_id': docket.pk,
                'name': trunc(best_case_name(docket), 100, ellipsis='...'),
            })
    else:
        favorite_form = FavoriteForm(instance=fave)

    de_list = docket.docket_entries.all().prefetch_related('recap_documents')
    form = DocketEntryFilterForm(request.GET)
    if form.is_valid():
        cd = form.cleaned_data
        if cd.get('entry_gte'):
            de_list = de_list.filter(entry_number__gte=cd['entry_gte'])
        if cd.get('entry_lte'):
            de_list = de_list.filter(entry_number__lte=cd['entry_lte'])
        if cd.get('filed_after'):
            de_list = de_list.filter(date_filed__gte=cd['filed_after'])
        if cd.get('filed_before'):
            de_list = de_list.filter(date_filed__lte=cd['filed_before'])
        if cd.get('order_by') == DocketEntryFilterForm.DESCENDING:
            de_list = de_list.order_by('-entry_number')

    paginator = Paginator(de_list, 100, orphans=5)
    page = request.GET.get('page')
    try:
        docket_entries = paginator.page(page)
    except PageNotAnInteger:
        docket_entries = paginator.page(1)
    except EmptyPage:
        docket_entries = paginator.page(paginator.num_pages)

    return render(
        request,
        'view_docket.html',
        {
            'docket': docket,
            'parties':
            docket.parties.exists(),  # Needed to show/hide parties tab.
            'docket_entries': docket_entries,
            'form': form,
            'favorite_form': favorite_form,
            'get_string': make_get_string(request),
            'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'),
            'private': docket.blocked,
        })
示例#22
0
def make_docket_title(docket):
    title = ", ".join([
        s for s in [
            trunc(best_case_name(docket), 100, ellipsis="..."),
            docket.docket_number,
        ] if s.strip()
    ])
    return title
示例#23
0
    def as_search_dict(self):
        """Create a dict that can be ingested by Solr"""
        # IDs
        out = {
            'id': self.pk,
            'docket_id': self.docket_id,
            'court_id': self.docket.court_id,
        }

        # Docket
        docket = {'docketNumber': self.docket.docket_number}
        if self.docket.date_argued is not None:
            docket['dateArgued'] = datetime.combine(self.docket.date_argued,
                                                    time())
        if self.docket.date_reargued is not None:
            docket['dateReargued'] = datetime.combine(
                self.docket.date_reargued, time())
        if self.docket.date_reargument_denied is not None:
            docket['dateReargumentDenied'] = datetime.combine(
                self.docket.date_reargument_denied, time())
        out.update(docket)

        # Court
        out.update({
            'court': self.docket.court.full_name,
            'court_citation_string': self.docket.court.citation_string,
            'court_exact': self.docket.court_id,  # For faceting
        })

        # Audio File
        out.update({
            'caseName':
            best_case_name(self),
            'panel_ids': [judge.pk for judge in self.panel.all()],
            'judge':
            self.judges,
            'file_size_mp3':
            deepgetattr(self, 'local_path_mp3.size', None),
            'duration':
            self.duration,
            'source':
            self.source,
            'download_url':
            self.download_url,
            'local_path':
            unicode(getattr(self, 'local_path_mp3', None))
        })
        try:
            out['absolute_url'] = self.get_absolute_url()
        except NoReverseMatch:
            raise InvalidDocumentError(
                "Unable to save to index due to missing absolute_url: %s" %
                self.pk)

        text_template = loader.get_template('indexes/audio_text.txt')
        out['text'] = text_template.render({'item': self}).translate(null_map)

        return normalize_search_dicts(out)
示例#24
0
    def as_search_dict(self):
        """Create a dict that can be ingested by Solr"""
        # IDs
        out = {
            'id': self.pk,
            'docket_id': self.docket_id,
            'court_id': self.docket.court_id,
        }

        # Docket
        docket = {'docketNumber': self.docket.docket_number}
        if self.docket.date_argued is not None:
            docket['dateArgued'] = datetime.combine(
                self.docket.date_argued,
                time()
            )
        if self.docket.date_reargued is not None:
            docket['dateReargued'] = datetime.combine(
                self.docket.date_reargued,
                time()
            )
        if self.docket.date_reargument_denied is not None:
            docket['dateReargumentDenied'] = datetime.combine(
                self.docket.date_reargument_denied,
                time()
            )
        out.update(docket)

        # Court
        out.update({
            'court': self.docket.court.full_name,
            'court_citation_string': self.docket.court.citation_string,
            'court_exact': self.docket.court_id,  # For faceting
        })

        # Audio File
        out.update({
            'caseName': best_case_name(self),
            'panel_ids': [judge.pk for judge in self.panel.all()],
            'judge': self.judges,
            'file_size_mp3': deepgetattr(self, 'local_path_mp3.size', None),
            'duration': self.duration,
            'source': self.source,
            'download_url': self.download_url,
            'local_path': unicode(getattr(self, 'local_path_mp3', None))
        })
        try:
            out['absolute_url'] = self.get_absolute_url()
        except NoReverseMatch:
            raise InvalidDocumentError(
                "Unable to save to index due to missing absolute_url: %s"
                % self.pk
            )

        text_template = loader.get_template('indexes/audio_text.txt')
        out['text'] = text_template.render({'item': self}).translate(null_map)

        return nuke_nones(out)
示例#25
0
    def as_search_dict(self):
        """Create a dict that can be ingested by Solr"""
        # IDs
        out = {
            "id": self.pk,
            "docket_id": self.docket_id,
            "court_id": self.docket.court_id,
        }

        # Docket
        docket = {"docketNumber": self.docket.docket_number}
        if self.docket.date_argued is not None:
            docket["dateArgued"] = midnight_pst(self.docket.date_argued)
        if self.docket.date_reargued is not None:
            docket["dateReargued"] = midnight_pst(self.docket.date_reargued)
        if self.docket.date_reargument_denied is not None:
            docket["dateReargumentDenied"] = midnight_pst(
                self.docket.date_reargument_denied
            )
        out.update(docket)

        # Court
        out.update(
            {
                "court": self.docket.court.full_name,
                "court_citation_string": self.docket.court.citation_string,
                "court_exact": self.docket.court_id,  # For faceting
            }
        )

        # Audio File
        out.update(
            {
                "caseName": best_case_name(self),
                "panel_ids": [judge.pk for judge in self.panel.all()],
                "judge": self.judges,
                "file_size_mp3": deepgetattr(
                    self, "local_path_mp3.size", None
                ),
                "duration": self.duration,
                "source": self.source,
                "download_url": self.download_url,
                "local_path": str(getattr(self, "local_path_mp3", None)),
            }
        )
        try:
            out["absolute_url"] = self.get_absolute_url()
        except NoReverseMatch:
            raise InvalidDocumentError(
                "Unable to save to index due to missing absolute_url: %s"
                % self.pk
            )

        text_template = loader.get_template("indexes/audio_text.txt")
        out["text"] = text_template.render({"item": self}).translate(null_map)

        return normalize_search_dicts(out)
示例#26
0
def set_mp3_meta_data(audio_obj, mp3_path):
    """Sets the meta data on the mp3 file to good values.

    :param audio_obj: an Audio object to clean up.
    :param mp3_path: the path to the mp3 to be converted.
    """
    court = audio_obj.docket.court

    # Load the file, delete the old tags and create a new one.
    audio_file = eyed3.load(mp3_path)

    # Undocumented API from eyed3.plugins.classic.ClassicPlugin#handleRemoves
    id3.Tag.remove(audio_file.tag.file_info.name, id3.ID3_ANY_VERSION, preserve_file_time=False)
    audio_file.initTag()
    audio_file.tag.title = best_case_name(audio_obj)
    audio_file.tag.album = u"{court}, {year}".format(court=court.full_name, year=audio_obj.docket.date_argued.year)
    audio_file.tag.artist = court.full_name
    audio_file.tag.artist_url = court.url
    audio_file.tag.audio_source_url = audio_obj.download_url
    audio_file.tag.comments.set(
        u"Argued: {date_argued}. Docket number: {docket_number}".format(
            date_argued=audio_obj.docket.date_argued.strftime("%Y-%m-%d"), docket_number=audio_obj.docket.docket_number
        )
    )
    audio_file.tag.genre = u"Speech"
    audio_file.tag.publisher = u"Free Law Project"
    audio_file.tag.publisher_url = u"https://free.law"
    audio_file.tag.recording_date = audio_obj.docket.date_argued.strftime("%Y-%m-%d")

    # Add images to the mp3. If it has a seal, use that for the Front Cover
    # and use the FLP logo for the Publisher Logo. If it lacks a seal, use the
    # Publisher logo for both the front cover and the Publisher logo.
    try:
        has_seal = seals_data[court.pk]["has_seal"]
    except AttributeError:
        # Unknown court in Seal Rookery.
        has_seal = False
    except KeyError:
        # Unknown court altogether (perhaps a test?)
        has_seal = False

    flp_image_frames = [3, 14]  # "Front Cover". Complete list at eyed3/id3/frames.py  # "Publisher logo".
    if has_seal:
        with open(os.path.join(seals_root, "512", "%s.png" % court.pk), "r") as f:
            audio_file.tag.images.set(3, f.read(), "image/png", u"Seal for %s" % court.short_name)
        flp_image_frames.remove(3)

    for frame in flp_image_frames:
        with open(
            os.path.join(settings.INSTALL_ROOT, "cl", "audio", "static", "png", "producer-300x300.png"), "r"
        ) as f:
            audio_file.tag.images.set(
                frame, f.read(), "image/png", u"Created for the public domain by Free Law Project"
            )

    audio_file.tag.save()
示例#27
0
def cluster_visualizations(request, pk, slug):
    cluster = get_object_or_404(OpinionCluster, pk=pk)
    return render(request, 'view_opinion_visualizations.html', {
        'title': '%s, %s' % (
            trunc(best_case_name(cluster), 100),
            cluster.citation_string
        ),
        'cluster': cluster,
        'private': cluster.blocked or cluster.has_private_authority,
    })
示例#28
0
def cluster_visualizations(request, pk, slug):
    cluster = get_object_or_404(OpinionCluster, pk=pk)
    return render(request, 'view_opinion_visualizations.html', {
        'title': '%s, %s' % (
            trunc(best_case_name(cluster), 100),
            cluster.citation_string
        ),
        'cluster': cluster,
        'private': cluster.blocked or cluster.has_private_authority,
    })
示例#29
0
def send_docket_alert(d_pk, since):
    """Send an alert for a given docket

    :param d_pk: The docket PK that was modified
    :param since: If we run alerts, notify users about items *since* this time.
    :return: The dict that was passed in as data is simply passed through. The
    next task in the chain needs the same information.
    """
    email_addresses = User.objects.filter(
        docket_alerts__docket_id=d_pk, ).distinct().values_list('email',
                                                                flat=True)
    if email_addresses:
        # We have an alert for this docket. Proceed.
        docket = Docket.objects.get(pk=d_pk)
        new_des = DocketEntry.objects.filter(date_created__gte=since,
                                             docket=docket)

        if new_des.count() > 0:
            # Notify every user that's subscribed to this alert.
            case_name = trunc(best_case_name(docket), 100, ellipsis='...')
            subject_template = loader.get_template('docket_alert_subject.txt')
            subject = subject_template.render({
                'docket': docket,
                'count': new_des.count(),
                'case_name': case_name,
            }).strip()  # Remove newlines that editors can insist on adding.
            email_context = {'new_des': new_des, 'docket': docket}
            txt_template = loader.get_template('docket_alert_email.txt')
            html_template = loader.get_template('docket_alert_email.html')
            messages = []
            for email_address in email_addresses:
                msg = EmailMultiAlternatives(
                    subject=subject,
                    body=txt_template.render(email_context),
                    from_email=settings.DEFAULT_ALERTS_EMAIL,
                    to=[email_address],
                    headers={'X-Entity-Ref-ID': 'docket.alert:%s' % d_pk})
                html = html_template.render(email_context)
                msg.attach_alternative(html, "text/html")
                messages.append(msg)

            # Add a bcc to the first message in the list so that we get a copy.
            messages[0].bcc = ['*****@*****.**']
            connection = get_connection()
            connection.send_messages(messages)
            tally_stat('alerts.docket.alerts.sent', inc=len(email_addresses))

        DocketAlert.objects.filter(docket=docket).update(date_last_hit=now())

    # Work completed, clear the semaphor
    r = redis.StrictRedis(host=settings.REDIS_HOST,
                          port=settings.REDIS_PORT,
                          db=settings.REDIS_DATABASES['ALERTS'])
    r.delete(make_alert_key(d_pk))
示例#30
0
def process_audio_file(pk):
    """Given the key to an audio file, extract its content and add the related
    meta data to the database.
    """
    af = Audio.objects.get(pk=pk)
    tmp_path = os.path.join("/tmp", "audio_" + uuid.uuid4().hex + ".mp3")
    av_path = get_audio_binary()
    av_command = [
        av_path,
        "-i",
        af.local_path_original_file.path,
        "-ar",
        "22050",  # sample rate (audio samples/s) of 22050Hz
        "-ab",
        "48k",  # constant bit rate (sample resolution) of 48kbps
        tmp_path,
    ]
    try:
        _ = subprocess.check_output(av_command, stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as e:
        print(
            "%s failed command: %s\nerror code: %s\noutput: %s\n%s"
            % (
                av_path,
                av_command,
                e.returncode,
                e.output,
                traceback.format_exc(),
            )
        )
        raise

    set_mp3_meta_data(af, tmp_path)
    try:
        with open(tmp_path, "r") as mp3:
            cf = ContentFile(mp3.read())
            file_name = trunc(best_case_name(af).lower(), 72) + "_cl.mp3"
            af.file_with_date = af.docket.date_argued
            af.local_path_mp3.save(file_name, cf, save=False)
    except:
        msg = (
            "Unable to save mp3 to audio_file in scraper.tasks."
            "process_audio_file for item: %s\n"
            "Traceback:\n"
            "%s" % (af.pk, traceback.format_exc())
        )
        print(msg)
        ErrorLog.objects.create(
            log_level="CRITICAL", court=af.docket.court, message=msg
        )

    af.duration = eyed3.load(tmp_path).info.time_secs
    af.processing_complete = True
    af.save()
示例#31
0
def make_rd_title(rd):
    de = rd.docket_entry
    d = de.docket
    return "{desc}#{doc_num}{att_num} in {case_name} ({court}{docket_number})".format(
        desc="%s &ndash; " % rd.description if rd.description else "",
        doc_num=rd.document_number,
        att_num=", Att. #%s" % rd.attachment_number
        if rd.document_type == RECAPDocument.ATTACHMENT else "",
        case_name=best_case_name(d),
        court=d.court.citation_string,
        docket_number=", %s" % d.docket_number if d.docket_number else "",
    )
示例#32
0
def make_rd_title(rd):
    de = rd.docket_entry
    d = de.docket
    return '{desc}#{doc_num}{att_num} in {case_name} ({court}{docket_number})'.format(
        desc='%s &ndash; ' % rd.description if rd.description else '',
        doc_num=rd.document_number,
        att_num=', Att. #%s' % rd.attachment_number if
                rd.document_type == RECAPDocument.ATTACHMENT else '',
        case_name=best_case_name(d),
        court=d.court.citation_string,
        docket_number=', %s' % d.docket_number if d.docket_number else '',
    )
示例#33
0
def send_docket_alert(d_pk, since):
    """Send an alert for a given docket

    :param d_pk: The docket PK that was modified
    :param since: If we run alerts, notify users about items *since* this time.
    :return: None
    """
    email_addresses = (User.objects.filter(
        docket_alerts__docket_id=d_pk).distinct().values_list("email",
                                                              flat=True))
    if email_addresses:
        # We have an alert for this docket. Proceed.
        docket = Docket.objects.get(pk=d_pk)
        new_des = DocketEntry.objects.filter(date_created__gte=since,
                                             docket=docket)

        if new_des.count() > 0:
            # Notify every user that's subscribed to this alert.
            case_name = trunc(best_case_name(docket), 100, ellipsis="...")
            subject_template = loader.get_template("docket_alert_subject.txt")
            subject = subject_template.render({
                "docket": docket,
                "count": new_des.count(),
                "case_name": case_name,
            }).strip()  # Remove newlines that editors can insist on adding.
            email_context = {"new_des": new_des, "docket": docket}
            txt_template = loader.get_template("docket_alert_email.txt")
            html_template = loader.get_template("docket_alert_email.html")
            messages = []
            for email_address in email_addresses:
                msg = EmailMultiAlternatives(
                    subject=subject,
                    body=txt_template.render(email_context),
                    from_email=settings.DEFAULT_ALERTS_EMAIL,
                    to=[email_address],
                    headers={"X-Entity-Ref-ID": "docket.alert:%s" % d_pk},
                )
                html = html_template.render(email_context)
                msg.attach_alternative(html, "text/html")
                messages.append(msg)

            # Add a bcc to the first message in the list so that we get a copy.
            messages[0].bcc = ["*****@*****.**"]
            connection = get_connection()
            connection.send_messages(messages)
            tally_stat("alerts.docket.alerts.sent", inc=len(email_addresses))

        DocketAlert.objects.filter(docket=docket).update(date_last_hit=now())

    # Work completed, clear the semaphore
    r = make_redis_interface("ALERTS")
    r.delete(make_alert_key(d_pk))
示例#34
0
def view_docket(request, pk, _):
    docket = get_object_or_404(Docket, pk=pk)
    if not is_bot(request):
        docket.view_count = F('view_count') + 1
        docket.save()

    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                'docket_id': docket.pk,
                'name': trunc(best_case_name(docket), 100, ellipsis='...'),
            })
    else:
        favorite_form = FavoriteForm(instance=fave)

    de_list = docket.docket_entries.all()
    form = DocketEntryFilterForm(request.GET)
    if form.is_valid():
        cd = form.cleaned_data
        if cd.get('entry_gte'):
            de_list = de_list.filter(entry_number__gte=cd['entry_gte'])
        if cd.get('entry_lte'):
            de_list = de_list.filter(entry_number__lte=cd['entry_lte'])
        if cd.get('filed_after'):
            de_list = de_list.filter(date_filed__gte=cd['filed_after'])
        if cd.get('filed_before'):
            de_list = de_list.filter(date_filed__lte=cd['filed_before'])
        if cd.get('order_by') == DocketEntryFilterForm.DESCENDING:
            de_list = de_list.order_by('-entry_number')

    paginator = Paginator(de_list, 500, orphans=25)
    page = request.GET.get('page')
    try:
        docket_entries = paginator.page(page)
    except PageNotAnInteger:
        docket_entries = paginator.page(1)
    except EmptyPage:
        docket_entries = paginator.page(paginator.num_pages)

    return render(
        request, 'view_docket.html', {
            'docket': docket,
            'docket_entries': docket_entries,
            'form': form,
            'favorite_form': favorite_form,
            'get_string': make_get_string(request),
            'private': docket.blocked,
        })
示例#35
0
def cluster_visualizations(
    request: HttpRequest, pk: int, slug: str
) -> HttpResponse:
    cluster = get_object_or_404(OpinionCluster, pk=pk)
    return render(
        request,
        "view_opinion_visualizations.html",
        {
            "title": "%s, %s"
            % (trunc(best_case_name(cluster), 100), cluster.citation_string),
            "cluster": cluster,
            "private": cluster.blocked or cluster.has_private_authority,
        },
    )
示例#36
0
    def __init__(self, item):
        self.id = item.pk
        self.docket_id = item.docket_id

        # Docket
        if item.docket.date_argued is not None:
            self.dateArgued = datetime.combine(
                item.docket.date_argued,
                time()
            )
        if item.docket.date_reargued is not None:
            self.dateReargued = datetime.combine(
                item.docket.date_reargued,
                time()
            )
        if item.docket.date_reargument_denied is not None:
            self.dateReargumentDenied = datetime.combine(
                item.docket.date_reargument_denied,
                time()
            )
        self.docketNumber = item.docket.docket_number

        # Court
        self.court = item.docket.court.full_name
        self.court_id = item.docket.court_id
        self.court_citation_string = item.docket.court.citation_string

        # Audio file
        self.caseName = best_case_name(item)
        self.panel_ids = [judge.pk for judge in item.panel.all()]
        self.judge = item.judges
        self.file_size_mp3 = deepgetattr(item, 'local_path_mp3.size', None)
        self.duration = item.duration
        self.source = item.source
        self.download_url = item.download_url
        self.local_path = unicode(getattr(item, 'local_path_mp3', None))

        try:
            self.absolute_url = item.get_absolute_url()
        except NoReverseMatch:
            raise InvalidDocumentError(
                "Unable to save to index due to missing absolute_url: %s"
                % item.pk)

        text_template = loader.get_template('indexes/audio_text.txt')
        context = {'item': item}
        self.text = text_template.render(context).translate(null_map)

        # For faceting
        self.court_exact = item.docket.court_id
示例#37
0
def view_docket(request, pk, _):
    docket = get_object_or_404(Docket, pk=pk)
    if not is_bot(request):
        docket.view_count = F('view_count') + 1
        docket.save()

    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'docket_id': docket.pk,
            'name': trunc(best_case_name(docket), 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    de_list = docket.docket_entries.all()
    form = DocketEntryFilterForm(request.GET)
    if form.is_valid():
        cd = form.cleaned_data
        if cd.get('entry_gte'):
            de_list = de_list.filter(entry_number__gte=cd['entry_gte'])
        if cd.get('entry_lte'):
            de_list = de_list.filter(entry_number__lte=cd['entry_lte'])
        if cd.get('filed_after'):
            de_list = de_list.filter(date_filed__gte=cd['filed_after'])
        if cd.get('filed_before'):
            de_list = de_list.filter(date_filed__lte=cd['filed_before'])
        if cd.get('order_by') == DocketEntryFilterForm.DESCENDING:
            de_list = de_list.order_by('-entry_number')

    paginator = Paginator(de_list, 500, orphans=25)
    page = request.GET.get('page')
    try:
        docket_entries = paginator.page(page)
    except PageNotAnInteger:
        docket_entries = paginator.page(1)
    except EmptyPage:
        docket_entries = paginator.page(paginator.num_pages)

    return render(request, 'view_docket.html', {
        'docket': docket,
        'docket_entries': docket_entries,
        'form': form,
        'favorite_form': favorite_form,
        'get_string': make_get_string(request),
        'private': docket.blocked,
    })
示例#38
0
def view_authorities(request, pk, slug):
    cluster = get_object_or_404(OpinionCluster, pk=pk)

    return render_to_response(
        'view_opinion_authorities.html', {
            'title':
            '%s, %s' %
            (trunc(best_case_name(cluster), 100), cluster.citation_string),
            'cluster':
            cluster,
            'private':
            cluster.blocked or cluster.has_private_authority,
            'authorities':
            cluster.authorities.order_by('case_name'),
        }, RequestContext(request))
示例#39
0
def view_authorities(request, pk, slug):
    cluster = get_object_or_404(OpinionCluster, pk=pk)

    return render_to_response(
        'view_opinion_authorities.html',
        {
            'title': '%s, %s' % (
                trunc(best_case_name(cluster), 100),
                cluster.citation_string
            ),
            'cluster': cluster,
            'private': cluster.blocked or cluster.has_private_authority,
            'authorities': cluster.authorities.order_by('case_name'),
        },
        RequestContext(request)
    )
示例#40
0
def filter_by_matching_antecedent(
    opinion_candidates: Iterable[Opinion],
    antecedent_guess: Optional[str],
) -> Optional[Opinion]:
    if not antecedent_guess:
        return None

    antecedent_guess = strip_punct(antecedent_guess)
    candidates: List[Opinion] = []

    for o in opinion_candidates:
        if antecedent_guess in best_case_name(o.cluster):
            candidates.append(o)

    # Remove duplicates and only accept if one candidate remains
    candidates = list(set(candidates))
    return candidates[0] if len(candidates) == 1 else None
示例#41
0
def view_authorities(request, pk, slug):
    cluster = get_object_or_404(OpinionCluster, pk=pk)

    return render(
        request,
        "view_opinion_authorities.html",
        {
            "title":
            "%s, %s" %
            (trunc(best_case_name(cluster), 100), cluster.citation_string),
            "cluster":
            cluster,
            "private":
            cluster.blocked or cluster.has_private_authority,
            "authorities_with_data":
            cluster.authorities_with_data,
        },
    )
示例#42
0
def process_audio_file(self, pk) -> None:
    """Given the key to an audio file, extract its content and add the related
    meta data to the database.

    :param self: A Celery task object
    :param pk: Audio file pk
    :return: None
    """
    af = Audio.objects.get(pk=pk)
    bte_audio_response = convert_and_clean_audio(af)
    bte_audio_response.raise_for_status()
    audio_obj = bte_audio_response.json()
    cf = ContentFile(base64.b64decode(audio_obj["audio_b64"]))
    file_name = trunc(best_case_name(af).lower(), 72) + "_cl.mp3"
    af.file_with_date = af.docket.date_argued
    af.local_path_mp3.save(file_name, cf, save=False)
    af.duration = audio_obj["duration"]
    af.processing_complete = True
    af.save()
示例#43
0
    def __init__(self, item):
        self.id = item.pk
        self.docket_id = item.docket_id

        # Docket
        if item.docket.date_argued is not None:
            self.dateArgued = datetime.combine(item.docket.date_argued, time())
        if item.docket.date_reargued is not None:
            self.dateReargued = datetime.combine(item.docket.date_reargued,
                                                 time())
        if item.docket.date_reargument_denied is not None:
            self.dateReargumentDenied = datetime.combine(
                item.docket.date_reargument_denied, time())
        self.docketNumber = item.docket.docket_number

        # Court
        self.court = item.docket.court.full_name
        self.court_id = item.docket.court_id
        self.court_citation_string = item.docket.court.citation_string

        # Audio file
        self.caseName = best_case_name(item)
        self.panel_ids = [judge.pk for judge in item.panel.all()]
        self.judge = item.judges
        self.file_size_mp3 = deepgetattr(item, 'local_path_mp3.size', None)
        self.duration = item.duration
        self.source = item.source
        self.download_url = item.download_url
        self.local_path = unicode(getattr(item, 'local_path_mp3', None))

        try:
            self.absolute_url = item.get_absolute_url()
        except NoReverseMatch:
            raise InvalidDocumentError(
                "Unable to save to index due to missing absolute_url: %s" %
                item.pk)

        text_template = loader.get_template('indexes/audio_text.txt')
        context = {'item': item}
        self.text = text_template.render(context).translate(null_map)

        # For faceting
        self.court_exact = item.docket.court_id
示例#44
0
def process_audio_file(pk):
    """Given the key to an audio file, extract its content and add the related
    meta data to the database.
    """
    af = Audio.objects.get(pk=pk)
    tmp_path = os.path.join('/tmp', 'audio_' + uuid.uuid4().hex + '.mp3')
    avconv_command = [
        'avconv', '-i', af.local_path_original_file.path,
        '-ar', '22050',  # sample rate (audio samples/s) of 22050Hz
        '-ab', '48k',    # constant bit rate (sample resolution) of 48kbps
        tmp_path
    ]
    try:
        _ = subprocess.check_output(
            avconv_command,
            stderr=subprocess.STDOUT
        )
    except subprocess.CalledProcessError as e:
        print('avconv failed command: %s\nerror code: %s\noutput: %s\n%s' %
              (avconv_command, e.returncode, e.output, traceback.format_exc()))
        raise

    set_mp3_meta_data(af, tmp_path)
    try:
        with open(tmp_path, 'r') as mp3:
            cf = ContentFile(mp3.read())
            file_name = trunc(best_case_name(af).lower(), 72) + '_cl.mp3'
            af.file_with_date = af.docket.date_argued
            af.local_path_mp3.save(file_name, cf, save=False)
    except:
        msg = ("Unable to save mp3 to audio_file in scraper.tasks."
               "process_audio_file for item: %s\n"
               "Traceback:\n"
               "%s" % (af.pk, traceback.format_exc()))
        print(msg)
        ErrorLog.objects.create(log_level='CRITICAL', court=af.docket.court,
                                message=msg)

    af.duration = eyed3.load(tmp_path).info.time_secs
    af.processing_complete = True
    af.save()
示例#45
0
def view_recap_document(request,
                        docket_id=None,
                        doc_num=None,
                        att_num=None,
                        slug=''):
    """This view can either load an attachment or a regular document,
    depending on the URL pattern that is matched.
    """
    item = get_object_or_404(
        RECAPDocument,
        docket_entry__docket__id=docket_id,
        document_number=doc_num,
        attachment_number=att_num,
    )
    title = '%sDocument #%s%s in %s' % (
        '%s &ndash; ' % item.description if item.description else '',
        item.document_number,
        ', Attachment #%s' % item.attachment_number
        if item.document_type == RECAPDocument.ATTACHMENT else '',
        best_case_name(item.docket_entry.docket),
    )
    try:
        fave = Favorite.objects.get(recap_doc_id=item.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'recap_doc_id': item.pk,
            'name': trunc(title, 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    return render(
        request,
        'recap_document.html',
        {
            'document': item,
            'title': title,
            'favorite_form': favorite_form,
            'private': True,  # Always True for RECAP docs.
        })
示例#46
0
    def __init__(self, item):
        self.id = item.pk
        self.docket_id = item.cluster.docket.pk
        self.cluster_id = item.cluster.pk
        self.court_id = item.cluster.docket.court.pk

        # Docket
        if item.cluster.docket.date_argued is not None:
            self.dateArgued = datetime.combine(
                item.cluster.docket.date_argued,
                time(),
            )
        if item.cluster.docket.date_reargued is not None:
            self.dateReargued = datetime.combine(
                item.cluster.docket.date_reargued,
                time(),
            )
        if item.cluster.docket.date_reargument_denied is not None:
            self.dateReargumentDenied = datetime.combine(
                item.cluster.docket.date_reargument_denied,
                time(),
            )
        self.docketNumber = item.cluster.docket.docket_number

        # Court
        self.court = item.cluster.docket.court.full_name
        self.court_citation_string = item.cluster.docket.court.citation_string

        # Cluster
        self.caseName = best_case_name(item.cluster)
        self.caseNameShort = item.cluster.case_name_short
        self.sibling_ids = [sibling.pk for sibling in item.siblings.all()]
        self.panel_ids = [judge.pk for judge in item.cluster.panel.all()]
        self.non_participating_judge_ids = [judge.pk for judge in
                                            item.cluster.non_participating_judges.all()]
        self.judge = item.cluster.judges
        if item.cluster.date_filed is not None:
            self.dateFiled = datetime.combine(
                item.cluster.date_filed,
                time()
            )  # Midnight, PST
        self.lexisCite = item.cluster.lexis_cite
        self.citation = [cite for cite in
                         item.cluster.citation_list if cite]  # Nuke '' and None
        self.neutralCite = item.cluster.neutral_cite
        self.scdb_id = item.cluster.scdb_id
        self.source = item.cluster.source
        self.attorney = item.cluster.attorneys
        self.suitNature = item.cluster.nature_of_suit
        self.citeCount = item.cluster.citation_count
        self.status = item.cluster.get_precedential_status_display()

        # Opinion
        self.cites = [opinion.pk for opinion in item.opinions_cited.all()]
        self.author_id = getattr(item.author, 'pk', None)
        # self.per_curiam = item.per_curiam
        self.joined_by_ids = [judge.pk for judge in item.joined_by.all()]
        self.type = item.type
        self.download_url = item.download_url or None
        self.local_path = unicode(item.local_path)

        try:
            self.absolute_url = item.cluster.get_absolute_url()
        except NoReverseMatch:
            raise InvalidDocumentError(
                "Unable to save to index due to missing absolute_url "
                "(court_id: %s, item.pk: %s). Might the court have in_use set "
                "to False?" % (self.docket.court_id, item.pk)
            )

        # Load the document text using a template for cleanup and concatenation
        text_template = loader.get_template('indexes/opinion_text.txt')
        context = {'item': item, 'citation_string': item.cluster.citation_string}
        self.text = text_template.render(context).translate(null_map)

        # Faceting fields
        self.status_exact = item.cluster.get_precedential_status_display()
        self.court_exact = item.cluster.docket.court.pk
示例#47
0
def view_opinion(request, pk, _):
    """Using the cluster ID, return the cluster of opinions.

    We also test if the cluster ID is a favorite for the user, and send data
    if needed. If it's a favorite, we send the bound form for the favorite so
    it can populate the form on the page. If it is not a favorite, we send the
    unbound form.
    """
    # Look up the court, cluster, title and favorite information
    cluster = get_object_or_404(OpinionCluster, pk=pk)
    title = ', '.join([s for s in [
        trunc(best_case_name(cluster), 100, ellipsis="..."),
        cluster.citation_string,
    ] if s.strip()])
    has_downloads = False
    for sub_opinion in cluster.sub_opinions.all():
        if sub_opinion.local_path or sub_opinion.download_url:
            has_downloads = True
            break
    get_string = search_utils.make_get_string(request)

    try:
        fave = Favorite.objects.get(cluster_id=cluster.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'cluster_id': cluster.pk,
            'name': trunc(best_case_name(cluster), 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    if not is_bot(request):
        # Get the citing results from Solr for speed. Only do this for humans
        # to save on disk usage.
        conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r')
        q = {
            'q': 'cites:({ids})'.format(
                ids=' OR '.join([str(pk) for pk in
                                 (cluster.sub_opinions
                                  .values_list('pk', flat=True))])
            ),
            'rows': 5,
            'start': 0,
            'sort': 'citeCount desc',
            'caller': 'view_opinion',
        }
        citing_clusters = conn.raw_query(**q).execute()
    else:
        citing_clusters = None

    return render(request, 'view_opinion.html', {
        'title': title,
        'cluster': cluster,
        'has_downloads': has_downloads,
        'favorite_form': favorite_form,
        'get_string': get_string,
        'private': cluster.blocked,
        'citing_clusters': citing_clusters,
        'top_authorities': cluster.authorities[:5],
    })
示例#48
0
            stderr=subprocess.STDOUT
        )
    except subprocess.CalledProcessError, e:
        print 'avconv failed command: %s\nerror code: %s\noutput: %s\n' % \
              (avconv_command, e.returncode, e.output)
        print traceback.format_exc()
        raise

    # Have to do this last because otherwise the mp3 hasn't yet been generated.
    set_mp3_meta_data(af, path_to_tmp_location)

    af.duration = eyed3.load(path_to_tmp_location).info.time_secs

    with open(path_to_tmp_location, 'r') as mp3:
        try:
            cf = ContentFile(mp3.read())
            file_name = trunc(best_case_name(af).lower(), 72) + '_cl.mp3'
            af.file_with_date = af.docket.date_argued
            af.local_path_mp3.save(file_name, cf, save=False)
        except:
            msg = "Unable to save mp3 to audio_file in scraper.tasks.process_" \
                  "audio_file for item: %s\nTraceback:\n%s" % \
                  (af.pk, traceback.format_exc())
            print msg
            ErrorLog(log_level='CRITICAL', court=af.docket.court,
                     message=msg).save()

    af.processing_complete = True
    af.save()
    os.remove(path_to_tmp_location)
示例#49
0
    def as_search_dict(self):
        """Create a dict that can be ingested by Solr."""
        # IDs
        out = {
            'id': self.pk,
            'docket_id': self.cluster.docket.pk,
            'cluster_id': self.cluster.pk,
            'court_id': self.cluster.docket.court.pk
        }

        # Opinion
        out.update({
            'cites': [opinion.pk for opinion in self.opinions_cited.all()],
            'author_id': getattr(self.author, 'pk', None),
            # 'per_curiam': self.per_curiam,
            'joined_by_ids': [judge.pk for judge in self.joined_by.all()],
            'type': self.type,
            'download_url': self.download_url or None,
            'local_path': unicode(self.local_path),
        })

        # Cluster
        out.update({
            'caseName': best_case_name(self.cluster),
            'caseNameShort': self.cluster.case_name_short,
            'sibling_ids': [sibling.pk for sibling in self.siblings.all()],
            'panel_ids': [judge.pk for judge in self.cluster.panel.all()],
            'non_participating_judge_ids': [
                judge.pk for judge in
                    self.cluster.non_participating_judges.all()
            ],
            'judge': self.cluster.judges,
            'lexisCite': self.cluster.lexis_cite,
            'citation': [
                cite for cite in
                    self.cluster.citation_list if cite],  # Nuke '' and None
            'neutralCite': self.cluster.neutral_cite,
            'scdb_id': self.cluster.scdb_id,
            'source': self.cluster.source,
            'attorney': self.cluster.attorneys,
            'suitNature': self.cluster.nature_of_suit,
            'citeCount': self.cluster.citation_count,
            'status': self.cluster.get_precedential_status_display(),
            'status_exact': self.cluster.get_precedential_status_display(),
        })
        if self.cluster.date_filed is not None:
            out['dateFiled'] = datetime.combine(
                self.cluster.date_filed,
                time()
            )  # Midnight, PST
        try:
            out['absolute_url'] = self.cluster.get_absolute_url()
        except NoReverseMatch:
            raise InvalidDocumentError(
                "Unable to save to index due to missing absolute_url "
                "(court_id: %s, item.pk: %s). Might the court have in_use set "
                "to False?" % (self.cluster.docket.court_id, self.pk)
            )

        # Docket
        docket = {'docketNumber': self.cluster.docket.docket_number}
        if self.cluster.docket.date_argued is not None:
            docket['dateArgued'] = datetime.combine(
                self.cluster.docket.date_argued,
                time(),
            )
        if self.cluster.docket.date_reargued is not None:
            docket['dateReargued'] = datetime.combine(
                self.cluster.docket.date_reargued,
                time(),
            )
        if self.cluster.docket.date_reargument_denied is not None:
            docket['dateReargumentDenied'] = datetime.combine(
                self.cluster.docket.date_reargument_denied,
                time(),
            )
        out.update(docket)

        court = {
            'court': self.cluster.docket.court.full_name,
            'court_citation_string': self.cluster.docket.court.citation_string,
            'court_exact': self.cluster.docket.court_id,  # For faceting
        }
        out.update(court)

        # Load the document text using a template for cleanup and concatenation
        text_template = loader.get_template('indexes/opinion_text.txt')
        out['text'] = text_template.render({
            'item': self,
            'citation_string': self.cluster.citation_string
        }).translate(null_map)

        return nuke_nones(out)
示例#50
0
    def as_search_dict(self):
        """Create a dict that can be ingested by Solr.

        Search results are presented as Dockets, but they're indexed as
        RECAPDocument's, which are then grouped back together in search results
        to form Dockets.
        """
        # IDs
        out = {
            'id': self.pk,
            'docket_entry_id': self.docket_entry.pk,
            'docket_id': self.docket_entry.docket.pk,
            'court_id': self.docket_entry.docket.court.pk,
            'assigned_to_id': getattr(
                self.docket_entry.docket.assigned_to, 'pk', None),
            'referred_to_id': getattr(
                self.docket_entry.docket.referred_to, 'pk', None)
        }

        # RECAPDocument
        out.update({
            'document_type': self.get_document_type_display(),
            'document_number': self.document_number,
            'attachment_number': self.attachment_number,
            'is_available': self.is_available,
            'page_count': self.page_count,
        })
        if hasattr(self.filepath_local, 'path'):
            out['filepath_local'] = self.filepath_local.path


        # Docket Entry
        out['description'] = self.docket_entry.description
        if self.docket_entry.entry_number is not None:
            out['entry_number'] = self.docket_entry.entry_number
        if self.docket_entry.date_filed is not None:
            out['entry_date_filed'] = datetime.combine(
                self.docket_entry.date_filed,
                time()
            )

        # Docket
        out.update({
            'docketNumber': self.docket_entry.docket.docket_number,
            'caseName': best_case_name(self.docket_entry.docket),
            'suitNature': self.docket_entry.docket.nature_of_suit,
            'cause': self.docket_entry.docket.cause,
            'juryDemand': self.docket_entry.docket.jury_demand,
            'jurisdictionType': self.docket_entry.docket.jurisdiction_type,
        })
        if self.docket_entry.docket.date_argued is not None:
            out['dateArgued'] = datetime.combine(
                self.docket_entry.docket.date_argued,
                time()
            )
        if self.docket_entry.docket.date_filed is not None:
            out['dateFiled'] = datetime.combine(
                self.docket_entry.docket.date_filed,
                time()
            )
        if self.docket_entry.docket.date_terminated is not None:
            out['dateTerminated'] = datetime.combine(
                self.docket_entry.docket.date_terminated,
                time()
            )
        try:
            out['absolute_url'] = self.docket_entry.docket.get_absolute_url()
        except NoReverseMatch:
            raise InvalidDocumentError(
                "Unable to save to index due to missing absolute_url: %s"
                % self.pk
            )

        # Judges
        if self.docket_entry.docket.assigned_to is not None:
            out['assignedTo'] = self.docket_entry.docket.assigned_to.name_full
        elif self.docket_entry.docket.assigned_to_str is not None:
            out['assignedTo'] = self.docket_entry.docket.assigned_to_str
        if self.docket_entry.docket.referred_to is not None:
            out['referredTo'] = self.docket_entry.docket.referred_to.name_full
        elif self.docket_entry.docket.referred_to_str is not None:
            out['referredTo'] = self.docket_entry.docket.referred_to_str

        # Court
        out.update({
            'court': self.docket_entry.docket.court.full_name,
            'court_exact': self.docket_entry.docket.court_id,  # For faceting
            'court_citation_string': self.docket_entry.docket.court.citation_string
        })

        text_template = loader.get_template('indexes/dockets_text.txt')
        out['text'] = text_template.render({'item': self}).translate(null_map)

        return nuke_nones(out)