示例#1
0
def import_law_box_case(case_path):
    """Open the file, get its contents, convert to XML and extract the meta data.

    Return a document object for saving in the database
    """
    raw_text = open(case_path).read()
    clean_html_tree, complete_html_tree, clean_html_str, body_text = get_html_from_raw_text(
        raw_text)

    sha1 = hashlib.sha1(clean_html_str).hexdigest()
    citations = get_citations_from_tree(complete_html_tree, case_path)
    judges = get_judge(clean_html_tree, case_path)
    court = get_court_object(clean_html_tree, citations, case_path, judges)

    doc = Document(
        source='L',
        sha1=sha1,
        html=
        clean_html_str,  # we clear this field later, putting the value into html_lawbox.
        date_filed=get_date_filed(clean_html_tree,
                                  citations=citations,
                                  case_path=case_path,
                                  court=court),
        precedential_status=get_precedential_status(),
        judges=judges,
        download_url=case_path,
    )

    cite = Citation(docket_number=get_docket_number(
        clean_html_tree, case_path=case_path, court=court))

    docket = Docket(
        case_name=get_case_name(complete_html_tree, case_path),
        court=court,
    )

    # Necessary for dup_finder.
    path = '//p/text()'
    doc.body_text = ' '.join(clean_html_tree.xpath(path))

    # Add the dict of citations to the object as its attributes.
    citations_as_dict = map_citations_to_models(citations)
    for k, v in citations_as_dict.iteritems():
        setattr(cite, k, v)

    doc.citation = cite
    doc.docket = docket

    return doc
示例#2
0
def import_mayer(case_path):
    """Open the file, get its contents, convert to XML and extract the meta data.

    Return a document object for saving in the database
    """
    #raw_text = open(case_path).read()    
    #clean_html_tree, complete_html_tree, clean_html_str, body_text = get_html_from_raw_text(raw_text)
    tree = html.parse(case_path)

    sha1 = hashlib.sha1(clean_html_str).hexdigest()
    citations = get_citations_from_tree(complete_html_tree, case_path)
    judges = get_judge(clean_html_tree, case_path)
    court = get_court_object(clean_html_tree, citations, case_path, judges)

    doc = Document(
        source='L',
        sha1=sha1,
        html=clean_html_str,  # we clear this field later, putting the value into html_lawbox.
        date_filed=get_date_filed(clean_html_tree, citations=citations, case_path=case_path, court=court),
        precedential_status=get_precedential_status(),
        judges=judges,
        download_url=case_path,
    )

    cite = Citation()

    docket = Docket(
        docket_number=get_docket_number(
            clean_html_tree,
            case_path=case_path,
            court=court
        ),
        case_name=get_case_name(complete_html_tree, case_path),
        court=court,
    )

    # Necessary for dup_finder.
    path = '//p/text()'
    doc.body_text = ' '.join(clean_html_tree.xpath(path))

    # Add the dict of citations to the object as its attributes.
    citations_as_dict = map_citations_to_models(citations)
    for k, v in citations_as_dict.iteritems():
        setattr(cite, k, v)

    doc.citation = cite
    doc.docket = docket

    return doc
示例#3
0
def create_stub(citations):
    """Creates a stub document with the bare minimum of meta data."""
    cite = Citation()
    # Add the dict of citations to the object as its attributes.
    citations_as_dict = map_citations_to_models(citations)
    for k, v in citations_as_dict.iteritems():
        setattr(cite, k, v)
    # TODO: We can use the court information in the citation here. Failure to do so will mean that our URLs will later
    #       change -- something we wish to avoid.
    stub_doc = Document(
        is_stub_document=True,
        sha1='!',
        court=None,
        citation=cite,
    )
    stub_doc.save(index=False)
    return stub_doc
示例#4
0
def citation_redirector(request, reporter, volume, page):
    """Take a citation URL and use it to redirect the user to the canonical page
    for that citation.

    This uses the same infrastructure as the thing that identifies citations in
    the text of opinions.
    """
    citation_str = " ".join([volume, reporter, page])
    try:
        citation = get_citations(citation_str)[0]
        citation_str = citation.base_citation()  # Corrects typos/variations.
        lookup_fields = [map_citations_to_models([citation]).keys()[0]]
    except IndexError:
        # Unable to disambiguate the citation. Try looking in *all* citation
        # fields.
        lookup_fields = [
            "neutral_cite",
            "federal_cite_one",
            "federal_cite_two",
            "federal_cite_three",
            "specialty_cite_one",
            "state_cite_regional",
            "state_cite_one",
            "state_cite_two",
            "state_cite_three",
            "westlaw_cite",
            "lexis_cite",
        ]

    # We were able to get a match, expand it if it's a federal/state match.
    if len(lookup_fields) == 1 and lookup_fields[0] == "federal_cite_one":
        lookup_fields = ["federal_cite_one", "federal_cite_two", "federal_cite_three"]
    elif len(lookup_fields) == 1 and lookup_fields[0] == "state_cite_one":
        lookup_fields = ["state_cite_one", "state_cite_two", "state_cite_three"]
    q = Q()
    for lookup_field in lookup_fields:
        q |= Q(**{"citation__" + lookup_field: citation_str})
    documents = Document.objects.filter(q)

    # Show the correct page....
    if documents.count() == 0:
        # No results for an otherwise valid citation.
        response = render_to_response(
            "casepage/citation_redirect_info_page.html",
            {"none_found": True, "citation_str": citation_str, "private": True},
            RequestContext(request),
            # status=404,
        )
        response.status_code = 404
        return response

    elif documents.count() == 1:
        # Total success. Redirect to correct location.
        return HttpResponsePermanentRedirect(documents[0].get_absolute_url())

    elif documents.count() > 1:
        # Multiple results. Show them.
        response = render_to_response(
            "casepage/citation_redirect_info_page.html",
            {"too_many": True, "citation_str": citation_str, "documents": documents, "private": True},
            RequestContext(request),
            # status=300,
        )
        response.status_code = 300
        return response
示例#5
0
def citation_redirector(request, reporter, volume, page):
    """Take a citation URL and use it to redirect the user to the canonical page
    for that citation.

    This uses the same infrastructure as the thing that identifies citations in
    the text of opinions.
    """
    citation_str = " ".join([volume, reporter, page])
    try:
        citation = get_citations(citation_str)[0]
        citation_str = citation.base_citation()  # Corrects typos/variations.
        lookup_fields = [map_citations_to_models([citation]).keys()[0]]
    except IndexError:
        # Unable to disambiguate the citation. Try looking in *all* citation
        # fields.
        lookup_fields = [
            'neutral_cite', 'federal_cite_one', 'federal_cite_two',
            'federal_cite_three', 'specialty_cite_one', 'state_cite_regional',
            'state_cite_one', 'state_cite_two', 'state_cite_three',
            'westlaw_cite', 'lexis_cite'
        ]

    # We were able to get a match, expand it if it's a federal/state match.
    if len(lookup_fields) == 1 and lookup_fields[0] == 'federal_cite_one':
        lookup_fields = [
            'federal_cite_one', 'federal_cite_two', 'federal_cite_three'
        ]
    elif len(lookup_fields) == 1 and lookup_fields[0] == 'state_cite_one':
        lookup_fields = [
            'state_cite_one', 'state_cite_two', 'state_cite_three'
        ]
    q = Q()
    for lookup_field in lookup_fields:
        q |= Q(**{'citation__' + lookup_field: citation_str})
    documents = Document.objects.filter(q)

    # Show the correct page....
    if documents.count() == 0:
        # No results for an otherwise valid citation.
        response = render_to_response(
            'casepage/citation_redirect_info_page.html',
            {
                'none_found': True,
                'citation_str': citation_str,
                'private': True,
            },
            RequestContext(request),
            #status=404,
        )
        response.status_code = 404
        return response

    elif documents.count() == 1:
        # Total success. Redirect to correct location.
        return HttpResponsePermanentRedirect(documents[0].get_absolute_url())

    elif documents.count() > 1:
        # Multiple results. Show them.
        response = render_to_response(
            'casepage/citation_redirect_info_page.html',
            {
                'too_many': True,
                'citation_str': citation_str,
                'documents': documents,
                'private': True,
            },
            RequestContext(request),
            #status=300,
        )
        response.status_code = 300
        return response