Python map_pacer_to_cl_id示例，cl.lib.pacer.map_pacer_to_cl_id Python示例

示例#1

0

显示文件

文件： scrape_pacer_free_opinions.py 项目： snorey/courtlistener

def get_next_date_range(court_id, span=7):
    """Get the next start and end query dates for a court.

    Check the DB for the last date for a court that was completed. Return the
    day after that date + span days into the future as the range to query for
    the requested court.

    If the court is still in progress, return (None, None).

    :param court_id: A PACER Court ID
    :param span: The number of days to go forward from the last completed date
    """
    court_id = map_pacer_to_cl_id(court_id)
    try:
        last_completion_log = PACERFreeDocumentLog.objects.filter(
            court_id=court_id,
        ).exclude(
            status=PACERFreeDocumentLog.SCRAPE_FAILED,
        ).latest('date_queried')
    except PACERFreeDocumentLog.DoesNotExist:
        print("FAILED ON: %s" % court_id)
        raise

    if last_completion_log.status == PACERFreeDocumentLog.SCRAPE_IN_PROGRESS:
        return None, None

    last_complete_date = last_completion_log.date_queried
    next_start_date = last_complete_date + timedelta(days=1)
    next_end_date = last_complete_date + timedelta(days=span)
    return next_start_date, next_end_date

示例#2

0

显示文件

def mark_court_in_progress(court_id, d):
    log = PACERFreeDocumentLog.objects.create(
        status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
        date_queried=d,
        court_id=map_pacer_to_cl_id(court_id),
    )
    return log

示例#3

0

显示文件

def get_next_date_range(court_id, span=7):
    """Get the next start and end query dates for a court.

    Check the DB for the last date for a court that was completed. Return the
    day after that date + span days into the future as the range to query for
    the requested court.

    If the court is still in progress, return (None, None).

    :param court_id: A PACER Court ID
    :param span: The number of days to go forward from the last completed date
    """
    court_id = map_pacer_to_cl_id(court_id)
    try:
        last_completion_log = PACERFreeDocumentLog.objects.filter(
            court_id=court_id, ).exclude(
                status=PACERFreeDocumentLog.SCRAPE_FAILED, ).latest(
                    'date_queried')
    except PACERFreeDocumentLog.DoesNotExist:
        logger.warn("FAILED ON: %s" % court_id)
        raise

    if last_completion_log.status == PACERFreeDocumentLog.SCRAPE_IN_PROGRESS:
        return None, None

    last_complete_date = last_completion_log.date_queried
    next_start_date = last_complete_date + timedelta(days=1)
    next_end_date = min(now().date(),
                        last_complete_date + timedelta(days=span))
    return next_start_date, next_end_date

示例#4

0

显示文件

def get_next_date_range(
    court_id: str,
    span: int = 7,
) -> Tuple[Optional[date], Optional[date]]:
    """Get the next start and end query dates for a court.

    Check the DB for the last date for a court that was completed. Return the
    day after that date + span days into the future as the range to query for
    the requested court.

    If the court is still in progress, return (None, None).

    :param court_id: A PACER Court ID
    :param span: The number of days to go forward from the last completed date
    """
    court_id = map_pacer_to_cl_id(court_id)
    try:
        last_completion_log = (PACERFreeDocumentLog.objects.filter(
            court_id=court_id).exclude(status=PACERFreeDocumentLog.
                                       SCRAPE_FAILED).latest("date_queried"))
    except PACERFreeDocumentLog.DoesNotExist:
        logger.warning(f"FAILED ON: {court_id}")
        raise

    if last_completion_log.status == PACERFreeDocumentLog.SCRAPE_IN_PROGRESS:
        return None, None

    # Ensure that we go back five days from the last time we had success if
    # that success was in the last few days.
    last_complete_date = min(now().date() - timedelta(days=5),
                             last_completion_log.date_queried)
    next_end_date = min(now().date(),
                        last_complete_date + timedelta(days=span))
    return last_complete_date, next_end_date

示例#5

0

显示文件

def mark_court_done_on_date(status, court_id, d):
    court_id = map_pacer_to_cl_id(court_id)
    try:
        doc_log = PACERFreeDocumentLog.objects.filter(
            status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
            court_id=court_id,
        ).latest('date_queried')
    except PACERFreeDocumentLog.DoesNotExist:
        return
    else:
        doc_log.date_queried = d
        doc_log.status = status
        doc_log.date_completed = now()
        doc_log.save()

    return status

示例#6

0

显示文件

文件： tasks.py 项目： snorey/courtlistener

def mark_court_done_on_date(status, court_id, d):
    court_id = map_pacer_to_cl_id(court_id)
    try:
        doc_log = PACERFreeDocumentLog.objects.filter(
            status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
            court_id=court_id,
        ).latest('date_queried')
    except PACERFreeDocumentLog.DoesNotExist:
        return
    else:
        doc_log.date_queried = d
        doc_log.status = status
        doc_log.date_completed = now()
        doc_log.save()

    return status

示例#7

0

显示文件

def process_free_opinion_result(self, row_pk, cnt):
    """Process a single result from the free opinion report"""
    result = PACERFreeDocumentRow.objects.get(pk=row_pk)
    result.court = Court.objects.get(pk=map_pacer_to_cl_id(result.court_id))
    result.case_name = harmonize(result.case_name)
    result.case_name_short = cnt.make_case_name_short(result.case_name)
    row_copy = copy.copy(result)
    # If we don't do this, the doc's date_filed becomes the docket's
    # date_filed. Bad.
    delattr(row_copy, 'date_filed')
    # If we don't do this, we get the PACER court id and it crashes
    delattr(row_copy, 'court_id')
    # If we don't do this, the id of result tries to smash that of the docket.
    delattr(row_copy, 'id')
    try:
        with transaction.atomic():
            docket = lookup_and_save(row_copy)
            if not docket:
                msg = "Unable to create docket for %s" % result
                logger.error(msg)
                result.error_msg = msg
                result.save()
                self.request.callbacks = None
                return
            docket.blocked, docket.date_blocked = get_blocked_status(docket)
            docket.save()

            de, de_created = DocketEntry.objects.update_or_create(
                docket=docket,
                entry_number=result.document_number,
                defaults={
                    'date_filed': result.date_filed,
                    'description': result.description,
                })
            rd, rd_created = RECAPDocument.objects.update_or_create(
                docket_entry=de,
                document_number=result.document_number,
                attachment_number=None,
                defaults={
                    'pacer_doc_id': result.pacer_doc_id,
                    'document_type': RECAPDocument.PACER_DOCUMENT,
                    'is_free_on_pacer': True,
                })
    except IntegrityError as e:
        msg = "Raised IntegrityError: %s" % e
        logger.error(msg)
        if self.request.retries == self.max_retries:
            result.error_msg = msg
            result.save()
            return
        raise self.retry(exc=e)
    except DatabaseError as e:
        msg = "Unable to complete database transaction:\n%s" % e
        logger.error(msg)
        result.error_msg = msg
        result.save()
        self.request.callbacks = None
        return

    if not rd_created and rd.is_available:
        # The item already exists and is available. Fantastic, mark it as free,
        # and call it a day.
        rd.is_free_on_pacer = True
        rd.save()
        result.delete()
        self.request.callbacks = None
        return

    return {
        'result': result,
        'rd_pk': rd.pk,
        'pacer_court_id': result.court_id
    }

示例#8

0

显示文件

文件： scrape_pacer_free_opinions.py 项目： snorey/courtlistener

def mark_court_in_progress(court_id, d):
    PACERFreeDocumentLog.objects.create(
        status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
        date_queried=d,
        court_id=map_pacer_to_cl_id(court_id),
    )

示例#9

0

显示文件

def update_docket_appellate_metadata(d, docket_data):
    """Update the metadata specific to appellate cases."""
    if not any([
            docket_data.get('originating_court_information'),
            docket_data.get('appeal_from'),
            docket_data.get('panel')
    ]):
        # Probably not appellate.
        return d, None

    d.panel_str = ', '.join(docket_data.get('panel', [])) or d.panel_str
    d.appellate_fee_status = docket_data.get('fee_status',
                                             '') or d.appellate_fee_status
    d.appellate_case_type_information = docket_data.get(
        'case_type_information', '') or d.appellate_case_type_information
    d.appeal_from_str = docket_data.get('appeal_from', '') or d.appeal_from_str

    # Do originating court information dict
    og_info = docket_data.get('originating_court_information')
    if not og_info:
        return d, None

    if og_info.get('court_id'):
        cl_id = map_pacer_to_cl_id(og_info['court_id'])
        if Court.objects.filter(pk=cl_id).exists():
            # Ensure the court exists. Sometimes PACER does weird things,
            # like in 14-1743 in CA3, where it says the court_id is 'uspci'.
            # If we don't do this check, the court ID could be invalid, and
            # our whole save of the docket fails.
            d.appeal_from_id = cl_id

    if d.originating_court_information:
        d_og_info = d.originating_court_information
    else:
        d_og_info = OriginatingCourtInformation()

    # Ensure we don't share A-Numbers, which can sometimes be in the docket
    # number field.
    docket_number = og_info.get('docket_number', '') or d_og_info.docket_number
    docket_number, _ = anonymize(docket_number)
    d_og_info.docket_number = docket_number
    d_og_info.court_reporter = og_info.get('court_reporter',
                                           '') or d_og_info.court_reporter
    d_og_info.date_disposed = og_info.get(
        'date_disposed') or d_og_info.date_disposed
    d_og_info.date_filed = og_info.get('date_filed') or d_og_info.date_filed
    d_og_info.date_judgment = og_info.get(
        'date_judgment') or d_og_info.date_judgment
    d_og_info.date_judgment_eod = og_info.get(
        'date_judgment_eod') or d_og_info.date_judgment_eod
    d_og_info.date_filed_noa = og_info.get(
        'date_filed_noa') or d_og_info.date_filed_noa
    d_og_info.date_received_coa = og_info.get(
        'date_received_coa') or d_og_info.date_received_coa
    d_og_info.assigned_to_str = og_info.get(
        'assigned_to') or d_og_info.assigned_to_str
    d_og_info.ordering_judge_str = og_info.get(
        'ordering_judge') or d_og_info.ordering_judge_str

    if not all([d.appeal_from_id, d_og_info.date_filed]):
        # Can't do judge lookups. Call it quits.
        return d, d_og_info

    if og_info.get('assigned_to'):
        judges = get_candidate_judges(og_info['assigned_to'], d.appeal_from_id,
                                      d_og_info.date_filed)
        if judges is not None and len(judges) == 1:
            d_og_info.assigned_to = judges[0]

    if og_info.get('ordering_judge'):
        judges = get_candidate_judges(og_info['ordering_judge'],
                                      d.appeal_from_id, d_og_info.date_filed)
        if judges is not None and len(judges) == 1:
            d_og_info.ordering_judge = judges[0]

    return d, d_og_info

示例#10

0

显示文件

def update_docket_appellate_metadata(d, docket_data):
    """Update the metadata specific to appellate cases."""
    if not any(
        [
            docket_data.get("originating_court_information"),
            docket_data.get("appeal_from"),
            docket_data.get("panel"),
        ]
    ):
        # Probably not appellate.
        return d, None

    d.panel_str = ", ".join(docket_data.get("panel", [])) or d.panel_str
    d.appellate_fee_status = (
        docket_data.get("fee_status", "") or d.appellate_fee_status
    )
    d.appellate_case_type_information = (
        docket_data.get("case_type_information", "")
        or d.appellate_case_type_information
    )
    d.appeal_from_str = docket_data.get("appeal_from", "") or d.appeal_from_str

    # Do originating court information dict
    og_info = docket_data.get("originating_court_information")
    if not og_info:
        return d, None

    if og_info.get("court_id"):
        cl_id = map_pacer_to_cl_id(og_info["court_id"])
        if Court.objects.filter(pk=cl_id).exists():
            # Ensure the court exists. Sometimes PACER does weird things,
            # like in 14-1743 in CA3, where it says the court_id is 'uspci'.
            # If we don't do this check, the court ID could be invalid, and
            # our whole save of the docket fails.
            d.appeal_from_id = cl_id

    if d.originating_court_information:
        d_og_info = d.originating_court_information
    else:
        d_og_info = OriginatingCourtInformation()

    # Ensure we don't share A-Numbers, which can sometimes be in the docket
    # number field.
    docket_number = og_info.get("docket_number", "") or d_og_info.docket_number
    docket_number, _ = anonymize(docket_number)
    d_og_info.docket_number = docket_number
    d_og_info.court_reporter = (
        og_info.get("court_reporter", "") or d_og_info.court_reporter
    )
    d_og_info.date_disposed = (
        og_info.get("date_disposed") or d_og_info.date_disposed
    )
    d_og_info.date_filed = og_info.get("date_filed") or d_og_info.date_filed
    d_og_info.date_judgment = (
        og_info.get("date_judgment") or d_og_info.date_judgment
    )
    d_og_info.date_judgment_eod = (
        og_info.get("date_judgment_eod") or d_og_info.date_judgment_eod
    )
    d_og_info.date_filed_noa = (
        og_info.get("date_filed_noa") or d_og_info.date_filed_noa
    )
    d_og_info.date_received_coa = (
        og_info.get("date_received_coa") or d_og_info.date_received_coa
    )
    d_og_info.assigned_to_str = (
        og_info.get("assigned_to") or d_og_info.assigned_to_str
    )
    d_og_info.ordering_judge_str = (
        og_info.get("ordering_judge") or d_og_info.ordering_judge_str
    )

    if not all([d.appeal_from_id, d_og_info.date_filed]):
        # Can't do judge lookups. Call it quits.
        return d, d_og_info

    lookup_judge_by_full_name_and_set_attr(
        d_og_info,
        "assigned_to",
        og_info.get("assigned_to"),
        d.appeal_from_id,
        d_og_info.date_filed,
    )
    lookup_judge_by_full_name_and_set_attr(
        d_og_info,
        "ordering_judge",
        og_info.get("ordering_judge"),
        d.appeal_from_id,
        d_og_info.date_filed,
    )

    return d, d_og_info

示例#11

0

显示文件

文件： tasks.py 项目： snorey/courtlistener

def process_free_opinion_result(self, row_pk, cnt):
    """Process a single result from the free opinion report"""
    result = PACERFreeDocumentRow.objects.get(pk=row_pk)
    result.court = Court.objects.get(pk=map_pacer_to_cl_id(result.court_id))
    result.case_name = harmonize(result.case_name)
    result.case_name_short = cnt.make_case_name_short(result.case_name)
    row_copy = copy.copy(result)
    # If we don't do this, the doc's date_filed becomes the docket's
    # date_filed. Bad.
    delattr(row_copy, 'date_filed')
    # If we don't do this, we get the PACER court id and it crashes
    delattr(row_copy, 'court_id')
    # If we don't do this, the id of result tries to smash that of the docket.
    delattr(row_copy, 'id')
    try:
        with transaction.atomic():
            docket = lookup_and_save(row_copy)
            if not docket:
                msg = "Unable to create docket for %s" % result
                logger.error(msg)
                result.error_msg = msg
                result.save()
                self.request.callbacks = None
                return
            docket.blocked, docket.date_blocked = get_blocked_status(docket)
            docket.save()

            de, de_created = DocketEntry.objects.update_or_create(
                docket=docket,
                entry_number=result.document_number,
                defaults={
                    'date_filed': result.date_filed,
                    'description': result.description,
                }
            )
            rd, rd_created = RECAPDocument.objects.update_or_create(
                docket_entry=de,
                document_number=result.document_number,
                attachment_number=None,
                defaults={
                    'pacer_doc_id': result.pacer_doc_id,
                    'document_type': RECAPDocument.PACER_DOCUMENT,
                    'is_free_on_pacer': True,
                }
            )
    except IntegrityError as e:
        msg = "Raised IntegrityError: %s" % e
        logger.error(msg)
        if self.request.retries == self.max_retries:
            result.error_msg = msg
            result.save()
            return
        raise self.retry(exc=e)
    except DatabaseError as e:
        msg = "Unable to complete database transaction:\n%s" % e
        logger.error(msg)
        result.error_msg = msg
        result.save()
        self.request.callbacks = None
        return

    if not rd_created and rd.is_available:
        # The item already exists and is available. Fantastic, mark it as free,
        # and call it a day.
        rd.is_free_on_pacer = True
        rd.save()
        result.delete()
        self.request.callbacks = None
        return

    return {'result': result, 'rd_pk': rd.pk, 'pacer_court_id': result.court_id}