Exemplos de PacerSession em Python, exemplos de juriscraper.pacer.http.PacerSession em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_pacer.py Projeto: janderse/juriscraper

 def test_logging_short_username(self):
     """If a username shorter than six characters is provided, do we
     throw an appropriate exception?
     """
     session = PacerSession(username='******', password='******')
     with self.assertRaises(PacerLoginException):
         session.login()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: buried_alive_project.py Projeto: freelawproject/courtlistener

def get_pacer_dockets(options, docket_pks, tags):
    """Get the pacer dockets identified by the FJC IDB rows"""
    q = options['queue']
    throttle = CeleryThrottle(queue_name=q)
    pacer_session = None
    for i, docket_pk in enumerate(docket_pks):
        if i < options['offset']:
            continue
        if i >= options['limit'] > 0:
            break
        throttle.maybe_wait()
        if i % 1000 == 0 or pacer_session is None:
            pacer_session = PacerSession(username=PACER_USERNAME,
                                         password=PACER_PASSWORD)
            pacer_session.login()
            logger.info("Sent %s tasks to celery so far." % i)
        d = Docket.objects.get(pk=docket_pk)
        chain(
            get_docket_by_pacer_case_id.s(
                {'pacer_case_id': d.pacer_case_id,
                 'docket_pk': d.pk},
                d.court_id,
                cookies=pacer_session.cookies,
                tag_names=tags,
                **{'show_parties_and_counsel': True,
                   'show_terminated_parties': True,
                   'show_list_of_member_cases': False}
            ).set(queue=q),
            add_or_update_recap_docket.s().set(queue=q),
        ).apply_async()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_pacer.py Projeto: janderse/juriscraper

 def test_logging_short_password(self):
     """If a short password is provided, do we throw an appropriate
     exception?
     """
     session = PacerSession(username='******', password='******')
     with self.assertRaises(PacerLoginException):
         session.login()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test_pacer.py Projeto: voutilad/juriscraper

    def test_logging_into_test_site(self):
        try:
            pacer_session = PacerSession(username='******',
                                         password='******')
            pacer_session.login_training()
            self.assertIsNotNone(pacer_session)
            self.assertIsNotNone(pacer_session.cookies.get(
                'PacerSession', None, domain='.uscourts.gov', path='/'))

        except PacerLoginException:
            self.fail('Could not log into PACER test site!')

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_pacer.py Projeto: janderse/juriscraper

    def test_logging_into_pacer(self):
        try:
            session = PacerSession(username=PACER_USERNAME,
                                   password=PACER_PASSWORD)
            session.login()
            self.assertIsNotNone(session)
            self.assertIsNotNone(session.cookies.get(
                'PacerSession', None, domain='.uscourts.gov', path='/'))

        except PacerLoginException:
            self.fail('Could not log into PACER')

Exemplo n.º 6

0

Exibir arquivo

Arquivo: test_pacer.py Projeto: freelawproject/juriscraper

    def setUp(self):
        pacer_session = PacerSession()

        if PACER_USERNAME and PACER_PASSWORD:
            # CAND chosen at random
            pacer_session = PacerSession(username=PACER_USERNAME,
                                         password=PACER_PASSWORD)
            pacer_session.login()

        with open(os.path.join(JURISCRAPER_ROOT, 'pacer/courts.json')) as j:
            self.courts = get_courts_from_json(json.load(j))

        path = os.path.join(TESTS_ROOT, 'fixtures/valid_free_opinion_dates.json')
        with open(path) as j:
            self.valid_dates = json.load(j)

        self.reports = {}
        for court in self.courts:
            court_id = get_court_id_from_url(court['court_link'])
            self.reports[court_id] = FreeOpinionReport(court_id, pacer_session)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: scrape_pacer_free_opinions.py Projeto: freelawproject/courtlistener

def get_pdfs(options):
    """Get PDFs for the results of the Free Document Report queries.

    At this stage, we have rows in the PACERFreeDocumentRow table, each of
    which represents a PDF we need to download and merge into our normal
    tables: Docket, DocketEntry, and RECAPDocument.

    In this function, we iterate over the entire table of results, merge it
    into our normal tables, and then download and extract the PDF.

    :return: None
    """
    q = options['queue']
    index = options['index']
    cnt = CaseNameTweaker()
    rows = PACERFreeDocumentRow.objects.filter(error_msg="").only('pk')
    count = rows.count()
    task_name = "downloading"
    if index:
        task_name += " and indexing"
    logger.info("%s %s items from PACER." % (task_name, count))
    throttle = CeleryThrottle(queue_name=q)
    completed = 0
    for row in queryset_generator(rows):
        throttle.maybe_wait()
        if completed % 30000 == 0:
            pacer_session = PacerSession(username=PACER_USERNAME,
                                         password=PACER_PASSWORD)
            pacer_session.login()
        c = chain(
            process_free_opinion_result.si(row.pk, cnt).set(queue=q),
            get_and_process_pdf.s(pacer_session.cookies, row.pk).set(queue=q),
            delete_pacer_row.s(row.pk).set(queue=q),
        )
        if index:
            c |= add_items_to_solr.s('search.RECAPDocument').set(queue=q)
        c.apply_async()
        completed += 1
        if completed % 1000 == 0:
            logger.info("Sent %s/%s tasks to celery for %s so "
                        "far." % (completed, count, task_name))

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_pacer.py Projeto: freelawproject/juriscraper

 def setUp(self):
     pacer_session = PacerSession(username=PACER_USERNAME,
                                  password=PACER_PASSWORD)
     pacer_session.login()
     self.report = DocketReport('cand', pacer_session)
     self.pacer_case_id = '186730'  # 4:06-cv-07294 Foley v. Bates

Exemplo n.º 9

0

Exibir arquivo

 def setUp(self):
     self.session = PacerSession(username=PACER_USERNAME,
                                 password=PACER_PASSWORD)

Exemplo n.º 10

0

Exibir arquivo

 def setUpClass(cls):
     if PACER_USERNAME and PACER_PASSWORD:
         cls.pacer_session = PacerSession(username=PACER_USERNAME,
                                          password=PACER_PASSWORD)
         cls.report = ShowCaseDocApi('dcd', cls.pacer_session)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: test_pacer.py Projeto: janderse/juriscraper

 def setUp(self):
     self.session = PacerSession(username=PACER_USERNAME,
                                 password=PACER_PASSWORD)

Exemplo n.º 12

0

Exibir arquivo

class PacerSessionTest(unittest.TestCase):
    """
    Test the PacerSession wrapper class
    """

    def setUp(self):
        self.session = PacerSession()

    def test_data_transformation(self):
        """
        Test our data transformation routine for building out PACER-compliant
        multi-part form data
        """
        data = {'case_id': 123, 'case_type': 'something'}
        expected = {'case_id': (None, 123), 'case_type': (None, 'something')}
        output = self.session._prepare_multipart_form_data(data)
        self.assertEqual(output, expected)

    @mock.patch('juriscraper.pacer.http.requests.Session.post')
    def test_ignores_non_data_posts(self, mock_post):
        """
        Test that POSTs without a data parameter just pass through as normal.

        :param mock_post: mocked Session.post method
        """
        data = {'name': ('filename', 'junk')}

        self.session.post('https://free.law', files=data)

        self.assertTrue(mock_post.called,
                        'request.Session.post should be called')
        self.assertEqual(data, mock_post.call_args[1]['files'],
                         'the data should not be changed if using a files call')

    @mock.patch('juriscraper.pacer.http.requests.Session.post')
    def test_transforms_data_on_post(self, mock_post):
        """
        Test that POSTs using the data parameter get transformed into PACER's
        delightfully odd multi-part form data.

        :param mock_post: mocked Session.post method
        """
        data = {'name': 'dave', 'age': 33}
        expected = {'name': (None, 'dave'), 'age': (None, 33)}

        self.session.post('https://free.law', data=data)

        self.assertTrue(mock_post.called,
                        'request.Session.post should be called')
        self.assertNotIn('data', mock_post.call_args[1],
                         'we should intercept data arguments')
        self.assertEqual(expected, mock_post.call_args[1]['files'],
                         'we should transform and populate the files argument')

    @mock.patch('juriscraper.pacer.http.requests.Session.post')
    def test_sets_default_timeout(self, mock_post):
        self.session.post('https://free.law', data={})

        self.assertTrue(mock_post.called,
                        'request.Session.post should be called')
        self.assertIn('timeout', mock_post.call_args[1],
                      'we should add a default timeout automatically')
        self.assertEqual(300, mock_post.call_args[1]['timeout'],
                         'default should be 300')

Exemplo n.º 13

0

Exibir arquivo

def get_and_save_free_document_reports(options):
    """Query the Free Doc Reports on PACER and get a list of all the free 
    documents. Do not download those items, as that step is done later.
    """
    # Kill any *old* logs that report they're in progress. (They've failed.)
    twelve_hrs_ago = now() - timedelta(hours=12)
    PACERFreeDocumentLog.objects.filter(
        date_started__lt=twelve_hrs_ago,
        status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
    ).update(
        status=PACERFreeDocumentLog.SCRAPE_FAILED,
    )

    pacer_court_ids = {
        map_cl_to_pacer_id(v): {'until': now(), 'count': 1, 'result': None} for v in
            Court.objects.filter(
                jurisdiction__in=['FD', 'FB'],
                in_use=True,
                end_date=None,
            ).exclude(
                pk__in=['casb', 'ganb', 'gub', 'innb', 'mieb', 'miwb', 'nmib',
                        'nvb', 'ohsb', 'prb', 'tnwb', 'vib']
            ).values_list(
                'pk', flat=True
            )
    }
    pacer_session = PacerSession(username=PACER_USERNAME,
                                 password=PACER_PASSWORD)
    pacer_session.login()

    # Iterate over every court, X days at a time. As courts are completed,
    # remove them from the list of courts to process until none are left
    tomorrow = now() + timedelta(days=1)
    while len(pacer_court_ids) > 0:
        court_ids_copy = pacer_court_ids.copy()  # Make a copy of the list.
        for pacer_court_id, delay in court_ids_copy.items():
            if now() < delay['until']:
                # Do other courts until the delay is up. Do not print/log
                # anything since at the end there will only be one court left.
                continue

            next_start_date, next_end_date = get_next_date_range(pacer_court_id)
            if delay['result'] is not None:
                if delay['result'].ready():
                    result = delay['result'].get()
                    if result == PACERFreeDocumentLog.SCRAPE_SUCCESSFUL:
                        if next_start_date >= tomorrow.date():
                            logger.info("Finished '%s'. Marking it complete." %
                                        pacer_court_id)
                            pacer_court_ids.pop(pacer_court_id, None)
                            continue

                    elif result == PACERFreeDocumentLog.SCRAPE_FAILED:
                        logger.error("Encountered critical error on %s "
                                     "(network error?). Marking as failed and "
                                     "pressing on." % pacer_court_id)
                        pacer_court_ids.pop(pacer_court_id, None)
                        continue
                else:
                    next_delay = min(delay['count'] * 5, 30)  # backoff w/cap
                    logger.info("Court %s still in progress. Delaying at least "
                                "%ss." % (pacer_court_id, next_delay))
                    pacer_court_ids[pacer_court_id]['until'] = now() + timedelta(
                        seconds=next_delay)
                    pacer_court_ids[pacer_court_id]['count'] += 1
                    continue

            mark_court_in_progress(pacer_court_id, next_end_date)
            pacer_court_ids[pacer_court_id]['count'] = 1  # Reset
            delay['result'] = chain(
                get_and_save_free_document_report.si(
                    pacer_court_id,
                    next_start_date,
                    next_end_date,
                    pacer_session
                ),
                mark_court_done_on_date.s(pacer_court_id, next_end_date),
            ).apply_async()

Exemplo n.º 14

0

Exibir arquivo

def get_pacer_session():
    return PacerSession(username=PACER_USERNAME, password=PACER_PASSWORD)

Exemplo n.º 15

0

Exibir arquivo

#!/usr/bin/env python
#
#  Takes an .html file on the command line, parses it using the PACER
#  Docket Report parser, and outputs json to stdout.

import jsondate3 as json
import sys

from juriscraper.pacer.http import PacerSession
from juriscraper.pacer import DocketReport

pacer_session = PacerSession(username="******", password="******")
report = DocketReport("psc", pacer_session)

for path in sys.argv[1:]:
    with open(path, "r") as f:
        report._parse_text(f.read().decode("utf-8"))
    data = report.data
    print json.dumps(data, indent=2, sort_keys=True, separators=(",", ": "))

Exemplo n.º 16

0

Exibir arquivo

Arquivo: test_pacer.py Projeto: anseljh/juriscraper

 def setUpClass(cls):
     pacer_session = PacerSession(username='******', password='******')
     cls.report = DocketReport('psc', pacer_session)
     cls.pacer_case_id = '62866'  # 1:07-cr-00001-RJA-HKS USA v. Green

Exemplo n.º 17

0

Exibir arquivo

 def setUp(self):
     self.session = PacerSession()

Exemplo n.º 18

0

Exibir arquivo

Arquivo: test_pacer.py Projeto: janderse/juriscraper

 def test_logging_in_bad_credentials(self):
     # Make sure password is more than eight characters.
     session = PacerSession(username='******', password='******')
     with self.assertRaises(PacerLoginException):
         session.login()

Exemplo n.º 19

0

Exibir arquivo

Arquivo: tasks.py Projeto: propublica/northern-il-federal-gun-cases

def fetch(ctx, overwrite=False):
    print('fetch')
    session = PacerSession(username=os.environ.get('PACER_USERNAME'),
                           password=os.environ.get('PACER_PASSWORD'))
    today = date.today().strftime('%m/%d/%Y')
    citations = [
        '18:922A.F',
        '18:922C.F',
        '18:922E.F',
        '18:922G.F',
        '18:924A.F',
        '18:924C.F',
    ]
    for citation in citations:
        outputfile = 'data/{0}.tsv'.format(citation)

        if overwrite or not os.path.exists(outputfile):
            body = {
                "office": (None, ""),
                "case_type": (None, ""),
                "case_flags": (None, ""),
                "citation": (None, citation),
                "pending_citations": (None, "1"),
                "terminated_citations": (None, "1"),
                "cvbcases": (None, "No"),
                "filed_from": (None, "1/1/2007"),
                "filed_to": (None, today),
                "terminal_digit": (None, ""),
                "pending_defendants": (None, "on"),
                "terminated_defendants": (None, "on"),
                "fugitive_defendants": (None, ""),
                "nonfugitive_defendants": (None, "1"),
                "reportable_cases": (None, "1"),
                "non_reportable_cases": (None, "1"),
                "sort1": (None, "case number"),
                "sort2": (None, ""),
                "sort3": (None, ""),
                "format": (None, "data")
            }
            intermediate_resp = session.post(
                'https://ecf.ilnd.uscourts.gov/cgi-bin/CrCaseFiled-Rpt.pl?1-L_1_0-1'
                .format(randint(200000, 40000000)),
                files=body)

            intermediate_doc = BeautifulSoup(intermediate_resp.content, 'lxml')
            form = intermediate_doc.find('form')
            action = form.attrs.get('action')
            action_path = action.split('/')[-1]
            url = 'https://ecf.ilnd.uscourts.gov/cgi-bin/' + action_path

            resp = session.post(url)

            print('-' * 50)
            print(citation)
            print('-' * 50)
            print(resp.content)

            with open(outputfile, 'w') as f:
                f.write(resp.content)

        else:
            print('skipped {0}'.format(citation))

Exemplo n.º 20

0

Exibir arquivo

Arquivo: test_pacer.py Projeto: umeboshi2/juriscraper

 def setUp(self):
     pacer_session = PacerSession(username=PACER_USERNAME,
                                  password=PACER_PASSWORD)
     pacer_session.login()
     self.report = DocketReport('cand', pacer_session)
     self.pacer_case_id = '186730'  # 4:06-cv-07294 Foley v. Bates

Exemplo n.º 21

0

Exibir arquivo

def get_and_save_free_document_reports(options):
    """Query the Free Doc Reports on PACER and get a list of all the free
    documents. Do not download those items, as that step is done later. For now
    just get the list.

    Note that this uses synchronous celery chains. A previous version was more
    complex and did not use synchronous chains. Unfortunately in Celery 4.2.0,
    or more accurately in redis-py 3.x.x, doing it that way failed nearly every
    time.

    This is a simpler version, though a slower one, but it should get the job
    done.
    """
    # Kill any *old* logs that report they're in progress. (They've failed.)
    three_hrs_ago = now() - timedelta(hours=3)
    PACERFreeDocumentLog.objects.filter(
        date_started__lt=three_hrs_ago,
        status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
    ).update(status=PACERFreeDocumentLog.SCRAPE_FAILED,)

    cl_court_ids = (
        Court.objects.filter(
            jurisdiction__in=[
                Court.FEDERAL_DISTRICT,
                Court.FEDERAL_BANKRUPTCY,
            ],
            in_use=True,
            end_date=None,
        )
        .exclude(pk__in=["casb", "gub", "innb", "miwb", "ohsb", "prb"],)
        .values_list("pk", flat=True,)
    )
    pacer_court_ids = [map_cl_to_pacer_id(v) for v in cl_court_ids]

    pacer_session = PacerSession(
        username=PACER_USERNAME, password=PACER_PASSWORD
    )
    pacer_session.login()

    today = now()
    for pacer_court_id in pacer_court_ids:
        while True:
            next_start_d, next_end_d = get_next_date_range(pacer_court_id)
            logger.info(
                "Attempting to get latest document references for "
                "%s between %s and %s",
                pacer_court_id,
                next_start_d,
                next_end_d,
            )
            mark_court_in_progress(pacer_court_id, next_end_d)
            try:
                status = get_and_save_free_document_report(
                    pacer_court_id,
                    next_start_d,
                    next_end_d,
                    pacer_session.cookies,
                )
            except RequestException:
                logger.error(
                    "Failed to get document references for %s "
                    "between %s and %s due to network error.",
                    pacer_court_id,
                    next_start_d,
                    next_end_d,
                )
                mark_court_done_on_date(
                    PACERFreeDocumentLog.SCRAPE_FAILED,
                    pacer_court_id,
                    next_end_d,
                )
                break
            except IndexError:
                logger.error(
                    "Failed to get document references for %s "
                    "between %s and %s due to PACER 6.3 bug.",
                    pacer_court_id,
                    next_start_d,
                    next_end_d,
                )
                mark_court_done_on_date(
                    PACERFreeDocumentLog.SCRAPE_FAILED,
                    pacer_court_id,
                    next_end_d,
                )
                break
            else:
                result = mark_court_done_on_date(
                    status, pacer_court_id, next_end_d
                )

            if result == PACERFreeDocumentLog.SCRAPE_SUCCESSFUL:
                if next_end_d >= today.date():
                    logger.info(
                        "Got all document references for '%s'.", pacer_court_id
                    )
                    # Break from while loop, onwards to next court
                    break
                else:
                    # More dates to do; let it continue
                    continue

            elif result == PACERFreeDocumentLog.SCRAPE_FAILED:
                logger.error(
                    "Encountered critical error on %s "
                    "(network error?). Marking as failed and "
                    "pressing on." % pacer_court_id
                )
                # Break from while loop, onwards to next court
                break

Exemplo n.º 22

0

Exibir arquivo

Arquivo: test_pacer.py Projeto: janderse/juriscraper

class PacerSessionTest(unittest.TestCase):
    """
    Test the PacerSession wrapper class
    """

    def setUp(self):
        self.session = PacerSession(username=PACER_USERNAME,
                                    password=PACER_PASSWORD)

    def test_data_transformation(self):
        """
        Test our data transformation routine for building out PACER-compliant
        multi-part form data
        """
        data = {'case_id': 123, 'case_type': 'something'}
        expected = {'case_id': (None, 123), 'case_type': (None, 'something')}
        output = self.session._prepare_multipart_form_data(data)
        self.assertEqual(output, expected)

    @mock.patch('juriscraper.pacer.http.requests.Session.post')
    def test_ignores_non_data_posts(self, mock_post):
        """
        Test that POSTs without a data parameter just pass through as normal.

        :param mock_post: mocked Session.post method
        """
        data = {'name': ('filename', 'junk')}

        self.session.post('https://free.law', files=data, auto_login=False)

        self.assertTrue(mock_post.called,
                        'request.Session.post should be called')
        self.assertEqual(data, mock_post.call_args[1]['files'],
                         'the data should not be changed if using a files call')

    @mock.patch('juriscraper.pacer.http.requests.Session.post')
    def test_transforms_data_on_post(self, mock_post):
        """
        Test that POSTs using the data parameter get transformed into PACER's
        delightfully odd multi-part form data.

        :param mock_post: mocked Session.post method
        """
        data = {'name': 'dave', 'age': 33}
        expected = {'name': (None, 'dave'), 'age': (None, 33)}

        self.session.post('https://free.law', data=data, auto_login=False)

        self.assertTrue(mock_post.called,
                        'request.Session.post should be called')
        self.assertNotIn('data', mock_post.call_args[1],
                         'we should intercept data arguments')
        self.assertEqual(expected, mock_post.call_args[1]['files'],
                         'we should transform and populate the files argument')

    @mock.patch('juriscraper.pacer.http.requests.Session.post')
    def test_sets_default_timeout(self, mock_post):
        self.session.post('https://free.law', data={}, auto_login=False)

        self.assertTrue(mock_post.called,
                        'request.Session.post should be called')
        self.assertIn('timeout', mock_post.call_args[1],
                      'we should add a default timeout automatically')
        self.assertEqual(300, mock_post.call_args[1]['timeout'],
                         'default should be 300')

    @mock.patch('juriscraper.pacer.http.PacerSession.login')
    @SKIP_IF_NO_PACER_LOGIN
    def test_auto_login(self, mock_login):
        """Do we automatically log in if needed?"""
        court_id = 'ksd'
        pacer_doc_id = '07902639735'
        url = make_doc1_url(court_id, pacer_doc_id, True)
        pacer_case_id = '81531'
        # This triggers and auto-login because we aren't logged in yet.
        self.session.username = PACER_USERNAME
        self.session.password = PACER_PASSWORD
        _ = self.session.get(url, params={
            'case_id': pacer_case_id,
            'got_receipt': '1',
        }, allow_redirects=True)
        self.assertTrue(mock_login.called,
                        'PacerSession.login() should be called.')

Exemplo n.º 23

0

Exibir arquivo

 def test_logging_in_bad_credentials(self):
     # Make sure password is more than eight characters.
     session = PacerSession(username='******', password='******')
     with self.assertRaises(PacerLoginException):
         session.login()

Exemplo n.º 24

0

Exibir arquivo

Arquivo: scrape_pacer_free_opinions.py Projeto: snorey/courtlistener

def get_and_save_free_document_reports(options):
    """Query the Free Doc Reports on PACER and get a list of all the free 
    documents. Do not download those items, as that step is done later.
    """
    # Kill any *old* logs that report they're in progress. (They've failed.)
    twelve_hrs_ago = now() - timedelta(hours=12)
    PACERFreeDocumentLog.objects.filter(
        date_started__lt=twelve_hrs_ago,
        status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
    ).update(
        status=PACERFreeDocumentLog.SCRAPE_FAILED,
    )

    pacer_court_ids = {
        map_cl_to_pacer_id(v): {'until': now(), 'count': 1, 'result': None} for v in
            Court.objects.filter(
                jurisdiction__in=['FD', 'FB'],
                in_use=True,
                end_date=None,
            ).exclude(
                pk__in=['casb', 'ganb', 'gub', 'innb', 'mieb', 'miwb', 'nmib',
                        'nvb', 'ohsb', 'prb', 'tnwb', 'vib']
            ).values_list(
                'pk', flat=True
            )
    }
    pacer_session = PacerSession(username=PACER_USERNAME,
                                 password=PACER_PASSWORD)
    pacer_session.login()

    # Iterate over every court, X days at a time. As courts are completed,
    # remove them from the list of courts to process until none are left
    tomorrow = now() + timedelta(days=1)
    while len(pacer_court_ids) > 0:
        court_ids_copy = pacer_court_ids.copy()  # Make a copy of the list.
        for pacer_court_id, delay in court_ids_copy.items():
            if now() < delay['until']:
                # Do other courts until the delay is up. Do not print/log
                # anything since at the end there will only be one court left.
                continue

            next_start_date, next_end_date = get_next_date_range(pacer_court_id)
            if delay['result'] is not None:
                if delay['result'].ready():
                    result = delay['result'].get()
                    if result == PACERFreeDocumentLog.SCRAPE_SUCCESSFUL:
                        if next_start_date >= tomorrow.date():
                            logger.info("Finished '%s'. Marking it complete." %
                                        pacer_court_id)
                            pacer_court_ids.pop(pacer_court_id, None)
                            continue

                    elif result == PACERFreeDocumentLog.SCRAPE_FAILED:
                        logger.error("Encountered critical error on %s "
                                     "(network error?). Marking as failed and "
                                     "pressing on." % pacer_court_id)
                        pacer_court_ids.pop(pacer_court_id, None)
                        continue
                else:
                    next_delay = min(delay['count'] * 5, 30)  # backoff w/cap
                    logger.info("Court %s still in progress. Delaying at least "
                                "%ss." % (pacer_court_id, next_delay))
                    pacer_court_ids[pacer_court_id]['until'] = now() + timedelta(
                        seconds=next_delay)
                    pacer_court_ids[pacer_court_id]['count'] += 1
                    continue

            mark_court_in_progress(pacer_court_id, next_end_date)
            pacer_court_ids[pacer_court_id]['count'] = 1  # Reset
            delay['result'] = chain(
                get_and_save_free_document_report.si(
                    pacer_court_id,
                    next_start_date,
                    next_end_date,
                    pacer_session
                ),
                mark_court_done_on_date.s(pacer_court_id, next_end_date),
            ).apply_async()

Exemplo n.º 25

0

Exibir arquivo

class PacerSessionTest(unittest.TestCase):
    """
    Test the PacerSession wrapper class
    """
    def setUp(self):
        self.session = PacerSession(username=PACER_USERNAME,
                                    password=PACER_PASSWORD)

    def test_data_transformation(self):
        """
        Test our data transformation routine for building out PACER-compliant
        multi-part form data
        """
        data = {'case_id': 123, 'case_type': 'something'}
        expected = {'case_id': (None, 123), 'case_type': (None, 'something')}
        output = self.session._prepare_multipart_form_data(data)
        self.assertEqual(output, expected)

    @mock.patch('juriscraper.pacer.http.requests.Session.post')
    def test_ignores_non_data_posts(self, mock_post):
        """
        Test that POSTs without a data parameter just pass through as normal.

        :param mock_post: mocked Session.post method
        """
        data = {'name': ('filename', 'junk')}

        self.session.post('https://free.law', files=data, auto_login=False)

        self.assertTrue(mock_post.called,
                        'request.Session.post should be called')
        self.assertEqual(
            data, mock_post.call_args[1]['files'],
            'the data should not be changed if using a files call')

    @mock.patch('juriscraper.pacer.http.requests.Session.post')
    def test_transforms_data_on_post(self, mock_post):
        """
        Test that POSTs using the data parameter get transformed into PACER's
        delightfully odd multi-part form data.

        :param mock_post: mocked Session.post method
        """
        data = {'name': 'dave', 'age': 33}
        expected = {'name': (None, 'dave'), 'age': (None, 33)}

        self.session.post('https://free.law', data=data, auto_login=False)

        self.assertTrue(mock_post.called,
                        'request.Session.post should be called')
        self.assertNotIn('data', mock_post.call_args[1],
                         'we should intercept data arguments')
        self.assertEqual(
            expected, mock_post.call_args[1]['files'],
            'we should transform and populate the files argument')

    @mock.patch('juriscraper.pacer.http.requests.Session.post')
    def test_sets_default_timeout(self, mock_post):
        self.session.post('https://free.law', data={}, auto_login=False)

        self.assertTrue(mock_post.called,
                        'request.Session.post should be called')
        self.assertIn('timeout', mock_post.call_args[1],
                      'we should add a default timeout automatically')
        self.assertEqual(300, mock_post.call_args[1]['timeout'],
                         'default should be 300')

    @mock.patch('juriscraper.pacer.http.PacerSession.login')
    def test_auto_login(self, mock_login):
        """Do we automatically log in if needed?"""
        court_id = 'ksd'
        pacer_doc_id = '07902639735'
        url = make_doc1_url(court_id, pacer_doc_id, True)
        pacer_case_id = '81531'
        # This triggers and auto-login because we aren't logged in yet.
        self.session.username = PACER_USERNAME
        self.session.password = PACER_PASSWORD
        _ = self.session.get(url,
                             params={
                                 'case_id': pacer_case_id,
                                 'got_receipt': '1',
                             },
                             allow_redirects=True)
        self.assertTrue(mock_login.called,
                        'PacerSession.login() should be called.')