Exemplo n.º 1
0
def citation(request, series_slug, volume_number_slug, page_number, case_id=None, pdf=False, db_case=None):
    """
        /<series_slug>/<volume_number>/<page_number>/                       -- show requested case (or list of cases, or case not found page).
        /<series_slug>/<volume_number>/<page_number>/<case_id>/             -- show requested case, using case_id to find one of multiple cases at this cite
    """

    # redirect if series slug or volume number slug is in the wrong format
    if not pdf and (slugify(series_slug) != series_slug or slugify(volume_number_slug) != volume_number_slug):
        return HttpResponseRedirect(reverse(
            'citation',
            args=[slugify(series_slug), slugify(volume_number_slug), page_number] + ([case_id] if case_id else []),
            host='cite'))

    ### try to look up citation

    case = None
    resolved_case = None
    if case_id:
        try:
            case = CaseDocument.get(id=case_id)
            resolved_cases = ResolveDocument.search().query("match", source='cap').query("match", source_id=case_id).execute()
            if resolved_cases:
                resolved_case = resolved_cases[0]
        except NotFoundError:
            raise Http404
Exemplo n.º 2
0
def volume(request, series_slug, volume_number_slug):
    """ /<series_slug>/<volume_number>/ -- list all cases for given volumes (typically only one). """

    # redirect if series slug or volume number slug is in the wrong format

    if slugify(series_slug) != series_slug or slugify(volume_number_slug) != volume_number_slug:
        return HttpResponseRedirect(reverse('volume', args=[slugify(series_slug), slugify(volume_number_slug)], host='cite'))

    vols = list(VolumeMetadata.objects
        .select_related('reporter')
        .filter(volume_number_slug=volume_number_slug, reporter__short_name_slug=series_slug, out_of_scope=False)
        .order_by('-second_part_of'))
    if not vols:
        raise Http404

    cases_query = CaseDocument.search()\
        .filter("term", volume__volume_number_slug=volume_number_slug)\
        .filter("term", reporter__short_name_slug__raw=series_slug)\
        .sort('first_page')\
        .extra(size=10000)\
        .source({"excludes": "casebody_data.*"})
    cases = cases_query.execute()
    cases = natsorted(cases, key=lambda c: c.first_page)

    volumes = [(volume, [c for c in cases if c.volume.barcode == volume.barcode]) for volume in vols]

    return render(request, 'cite/volume.html', {
        "volumes": volumes,
    })
Exemplo n.º 3
0
def get_toc_by_url():
    from capapi.documents import CaseDocument
    base_path = Path(__file__).parent.joinpath('templates/docs')
    toc_by_url = {
        '': {
            'parents': [],
            'children': [],
            'url': '',
            'meta': {
                'doc_link': '/',
            }
        },
    }

    # context variables for rendering markdown templates
    context = {
        'email': settings.DEFAULT_FROM_EMAIL,
        'news': get_data_from_lil_site(section="news"),
    }
    try:
        case = CaseDocument.get(id=settings.API_DOCS_CASE_ID)
Exemplo n.º 4
0
    def get_filter_query_params(self, request, view):
        def lc_values(values):
            return [
                value.lower() for value in values if isinstance(value, str)
            ]

        query_params = super().get_filter_query_params(request, view)

        if 'cite' in query_params:
            query_params['cite']['values'] = [
                models.normalize_cite(cite)
                for cite in lc_values(query_params['cite']['values'])
            ]

        if 'court' in query_params:
            query_params['court']['values'] = lc_values(
                query_params['court']['values'])

        if 'jurisdiction' in query_params:
            query_params['jurisdiction']['values'] = lc_values(
                query_params['jurisdiction']['values'])

        if 'cites_to' in query_params:
            old_cites_to = query_params['cites_to']['values']
            query_params['cites_to']['values'] = []
            for cite in old_cites_to:
                # check if case id is passed in
                if cite.isdigit():
                    try:
                        case = CaseDocument.get(id=cite)
                        # add all citations relating to case
                        query_params['cites_to']['values'] += [
                            c['normalized_cite'] for c in case.citations
                        ]
                    except NotFoundError:
                        pass
                else:
                    query_params['cites_to']['values'].append(
                        normalize_cite(cite))
Exemplo n.º 5
0
def random(request):
    """ Redirect to a random case over 1,000 words. """
    s = CaseDocument.search().source(['frontend_url']).filter('range', analysis__word_count={'gte':1000})
    s.query = FunctionScore(
        query=s.query,  # omit this if not applying a filter first
        functions=[
            SF('random_score'),
            # to weight by pagerank:
            # SF('field_value_factor', field='analysis.pagerank.percentile', modifier="ln1p", missing=0)
        ],
        boost_mode='replace',
    )
    random_case = s[0].execute()[0]
    return HttpResponseRedirect(random_case.frontend_url)
Exemplo n.º 6
0
def export_cases_by_reporter(version_string, id):
    """
        Write a .jsonl.gz file with all cases for reporter.
    """
    reporter = Reporter.objects.get(pk=id)
    cases = CaseDocument.raw_search().filter("term", reporter__id=id)
    if cases.count() == 0:
        print("WARNING: Reporter '{}' contains NO CASES.".format(
            reporter.full_name))
        return
    out_path = Path(
        "bulk_exports", version_string, "by_reporter", "{subfolder}",
        reporter.short_name_slug,
        "%s_{case_format}_%s.zip" % (reporter.short_name_slug, version_string))
    public = not reporter.case_metadatas.in_scope().filter(
        jurisdiction__whitelisted=False).exists()
    export_case_documents(cases, out_path, reporter, public=public)
Exemplo n.º 7
0
def export_cases_by_jurisdiction(version_string, id):
    """
        Write a .jsonl.gz file with all cases for jurisdiction.
    """
    jurisdiction = Jurisdiction.objects.get(pk=id)
    cases = CaseDocument.raw_search().filter("term", jurisdiction__id=id)
    if cases.count() == 0:
        print("WARNING: Jurisdiction '{}' contains NO CASES.".format(
            jurisdiction.name))
        return
    out_path = Path(
        "bulk_exports", version_string, "by_jurisdiction", "{subfolder}",
        jurisdiction.slug,
        "%s_{case_format}_%s.zip" % (jurisdiction.slug, version_string))
    export_case_documents(cases,
                          out_path,
                          jurisdiction,
                          public=jurisdiction.whitelisted)
Exemplo n.º 8
0
def test_CaseDocumentSerializerWithCasebody(api_request_factory, case_factory,
                                            elasticsearch):
    cases = [case_factory() for i in range(3)]
    case_documents = [CaseDocument.get(c.id) for c in cases]

    # can get single case data
    request = api_request_factory.get(api_reverse("cases-list"))
    request.accepted_renderer = None
    serializer_context = {'request': Request(request)}

    serialized = serializers.CaseDocumentSerializerWithCasebody(
        case_documents[0], context=serializer_context)
    assert 'casebody' in serialized.data

    # can get multiple cases' data
    serialized = serializers.CaseDocumentSerializerWithCasebody(
        case_documents, many=True, context=serializer_context)
    assert len(serialized.data) == 3
    for case in serialized.data:
        assert 'casebody' in case
Exemplo n.º 9
0
    case2.volume.volume_number = "124"
    case2.volume.save()
    CaseMetadata.update_frontend_urls(["124 Test 456", "123 Test 456"])
    case1.refresh_from_db()
    case2.refresh_from_db()

    assert case1.frontend_url == "/test/123/456/"
    assert case2.frontend_url == "/test/124/456/"


@pytest.mark.django_db
def test_set_duplicate(reset_sequences, case, elasticsearch):
    # make sure set_duplicate function updates the cases and removes the cases from the elasticsearch index
    duplicate_of = VolumeMetadata.objects.exclude(pk=case.volume.pk).first()
    update_elasticsearch_from_queue()
    assert CaseDocument.search().filter(
        "term", volume__barcode=case.volume.barcode).count() == 1
    case.volume.set_duplicate(duplicate_of)
    assert case.volume.duplicate_of == duplicate_of
    update_elasticsearch_from_queue()
    assert CaseDocument.search().filter(
        "term", volume__barcode=case.volume.barcode).count() == 0


@pytest.mark.django_db
def test_set_reporter(reset_sequences, case, elasticsearch, reporter):
    volume = case.volume

    old_frontend_url_rep_shorts = set([
        case.frontend_url.split('/')[1]
        for case in volume.case_metadatas.all()
    ])
Exemplo n.º 10
0
            'meta': {
                'doc_link': '/',
            }
        },
    }

    # context variables for rendering markdown templates
    context = {
        'email': settings.DEFAULT_FROM_EMAIL,
        'news': get_data_from_lil_site(section="news"),
    }
    try:
        case = CaseDocument.get(id=settings.API_DOCS_CASE_ID)
    except NotFoundError:
        try:
            case = CaseDocument.search()[0].execute()[0]
        except NotFoundError:
            case = None
    context['case_id'] = case.id if case else 1
    context['case_url'] = reverse('cases-detail',
                                  args=[context['case_id']],
                                  host='api')
    context['case_cite'] = case.citations[0].cite if case else "123 U.S. 456"

    def path_string_to_title(string):
        return string.replace('-', ' ').replace('_', ' ').title().replace(
            'Api', 'API').replace('Cap', 'CAP')

    for path in iter_docs():
        if not (path.suffix == '.md' or path.is_dir()):
            continue
Exemplo n.º 11
0
        body_cache__text=", some text, ".join(legitimate_cites +
                                              illegitimate_cites),
        decision_date=datetime(2000, 1, 1))
    fabfile.extract_all_citations()
    update_elasticsearch_from_queue()

    # check extracted cites
    cites = list(ExtractedCitation.objects.all())
    cite_set = set(c.cite for c in cites)
    normalized_cite_set = set(c.normalized_cite for c in cites)
    assert cite_set == set(legitimate_cites)
    assert normalized_cite_set == legitimate_cites_normalized
    assert all(c.cited_by_id == case.pk for c in cites)
    assert set(
        c['cite']
        for c in CaseDocument.get(id=case.pk).extractedcitations) == cite_set
    assert set(c['normalized_cite'] for c in CaseDocument.get(
        id=case.pk).extractedcitations) == normalized_cite_set

    # remove a cite and add a cite --
    # make sure IDs of unchanged cites are still the same
    removed_cite_str = legitimate_cites[0]
    added_cite_str = '123 F. Supp. 456'
    case.body_cache.text = case.body_cache.text.replace(
        removed_cite_str, '') + ', some text, ' + added_cite_str
    case.body_cache.save()
    fabfile.extract_all_citations()
    new_cites = list(ExtractedCitation.objects.all())
    removed_cite = next(c for c in cites if c.cite == removed_cite_str)
    added_cite = next(c for c in new_cites if c.cite == added_cite_str)
    assert {(c.id, c.cite)
Exemplo n.º 12
0
            if resolved_cases:
                resolved_case = resolved_cases[0]
        except NotFoundError:
            raise Http404
    else:
        full_cite = "%s %s %s" % (volume_number_slug, series_slug.replace('-', ' ').title(), page_number)
        normalized_cite = re.sub(r'[^0-9a-z]', '', full_cite.lower())
        resolved = ResolveDocument.search().filter("term", citations__normalized_cite=normalized_cite).execute()
        resolved_by_source = None

        if resolved:
            resolved_by_source = group_by(resolved, lambda r: r.source)
            if 'cap' in resolved_by_source:
                if len(resolved_by_source['cap']) == 1:
                    resolved_case = resolved_by_source['cap'][0]
                    case = CaseDocument.get(resolved_case['source_id'])
            else:
                cap_candidates = {
                    c.id: c
                    for r in resolved
                    for cite in r.citations
                    for c in CaseDocument.search().filter("term", citations__normalized_cite=cite.normalized_cite).execute()
                }
                if cap_candidates:
                    resolved_by_source['cap_guess'] = cap_candidates.values()

        if not case:
            reporter = Reporter.objects.filter(short_name_slug=slugify(series_slug)).first()
            if reporter:
                series = reporter.short_name
                full_cite = f'{volume_number_slug} {series} {page_number}'