示例#1
0
 def setUp(self):
     self.parser = RecordParser()
     self.parser.feed(JohnDoe.RECORD)
     self.case1_date = datetime.date(1963, 3, 23)
     self.case2_date = datetime.date(1963, 4, 11)
     self.case3_date = datetime.date(2012, 4, 1)
     self.base_uri = "https://publicaccess.courts.oregon.gov/PublicAccessLogin/CaseDetail.aspx?CaseID="
示例#2
0
    def test_it_assigns_birth_year(self):
        parser = RecordParser()
        parser.feed(JohnDoe.RECORD_WITH_MISSING_BIRTH_YEAR)

        assert parser.cases[0].birth_year == 1943
        assert parser.cases[1].birth_year == 1943
        assert parser.cases[2].birth_year is None
示例#3
0
class TestRecordParser(unittest.TestCase):

    def setUp(self):
        self.parser = RecordParser()
        self.parser.feed(JohnDoe.RECORD)
        self.case1_date = datetime.date(1963, 3, 23)
        self.case2_date = datetime.date(1963, 4, 11)
        self.case3_date = datetime.date(2012, 4, 1)
        self.base_uri = "https://publicaccess.courts.oregon.gov/PublicAccessLogin/CaseDetail.aspx?CaseID="

    def test_non_empty_record(self):
        """Tests it can parse a record."""
        assert len(self.parser.cases) == 3

    def test_it_assigns_case_info(self):
        for case in self.parser.cases:
            assert case.name == "Doe, John D"
            assert case.birth_year == 1943

    def test_it_assigns_case_number(self):
        assert self.parser.cases[0].case_number == "X0001"
        assert self.parser.cases[1].case_number == "X0002"
        assert self.parser.cases[2].case_number == "X0003"

    def test_it_assigns_link_to_case(self):
        assert self.parser.cases[0].case_detail_link == self.base_uri + "X0001"
        assert self.parser.cases[1].case_detail_link == self.base_uri + "X0002"
        assert self.parser.cases[2].case_detail_link == self.base_uri + "X0003"

    def test_it_assigns_citation(self):
        assert self.parser.cases[0].citation_number == "C0001"
        assert self.parser.cases[1].citation_number == "C0002"
        assert self.parser.cases[2].citation_number == ""

    def test_it_assigns_date_location(self):
        assert self.parser.cases[0].date == self.case1_date
        assert self.parser.cases[0].location == "Multnomah"

        assert self.parser.cases[1].date == self.case2_date
        assert self.parser.cases[1].location == "Multnomah"

        assert self.parser.cases[2].date == self.case3_date
        assert self.parser.cases[2].location == "Multnomah"

    def test_it_assigns_violation_status_info(self):
        assert self.parser.cases[0].violation_type == "Offense Misdemeanor"
        assert self.parser.cases[0].current_status == "Closed"

        assert self.parser.cases[1].violation_type == "Offense Felony"
        assert self.parser.cases[1].current_status == "Closed"

        assert self.parser.cases[2].violation_type == "Offense Misdemeanor"
        assert self.parser.cases[2].current_status == "Open"

    def test_it_assigns_charges(self):
        assert len(self.parser.cases[0].charges) == 0
        assert len(self.parser.cases[1].charges) == 0
        assert len(self.parser.cases[2].charges) == 0
示例#4
0
 def _search_record(session: Session, node_response, search_url, first_name,
                    last_name, middle_name, birth_date):
     payload = Crawler.__extract_payload(node_response, last_name,
                                         first_name, middle_name,
                                         birth_date)
     response = session.post(search_url, data=payload, timeout=30)
     record_parser = RecordParser()
     record_parser.feed(response.text)
     return record_parser
示例#5
0
 def __init__(self):
     self.session = requests.Session()
     self.response = requests.Response
     self.result = RecordParser()
示例#6
0
class Crawler:
    def __init__(self):
        self.session = requests.Session()
        self.response = requests.Response
        self.result = RecordParser()

    def login(self, username, password, close_session=False):
        url = URL.login_url()
        payload = Payload.login_payload(username, password)

        self.response = self.session.post(url, data=payload)

        if close_session:
            self.session.close()

        return Crawler.__login_validation(self.response, url)

    def search(self, first_name, last_name, middle_name='', birth_date=''):
        url = 'https://publicaccess.courts.oregon.gov/PublicAccessLogin/Search.aspx?ID=100'
        node_response = self.__parse_nodes(url)
        payload = Crawler.__extract_payload(node_response, last_name,
                                            first_name, middle_name,
                                            birth_date)

        # perform search
        response = self.session.post(url, data=payload)
        self.result.feed(response.text)

        # Parse search results (case detail pages)
        for case in self.result.cases:
            case_parser = self.__parse_case(case)
            case.set_balance_due(case_parser.balance_due)
            for charge_id, charge in case_parser.hashed_charge_data.items():
                charge['case'] = case
                new_charge = Crawler.__build_charge(charge_id, charge,
                                                    case_parser)
                case.charges.append(new_charge)

        self.session.close()
        return Record(self.result.cases)

    def __parse_nodes(self, url):
        node_parser = NodeParser()
        node_parser.feed(self.response.text)
        payload = {'NodeID': node_parser.node_id, 'NodeDesc': 'All+Locations'}
        return self.session.post(url, data=payload)

    def __parse_case(self, case):
        case_parser = CaseParser()
        response = self.session.get(case.case_detail_link)
        case_parser.feed(response.text)
        return case_parser

    @staticmethod
    def __extract_payload(node_response, last_name, first_name, middle_name,
                          birth_date):
        param_parser = ParamParser()
        param_parser.feed(node_response.text)
        return Payload.payload(param_parser, last_name, first_name,
                               middle_name, birth_date)

    @staticmethod
    def __login_validation(response, login_url):
        return response.url != login_url

    @staticmethod
    def __build_charge(charge_id, charge, case_parser):
        if case_parser.hashed_dispo_data.get(charge_id):
            charge['disposition'] = Disposition(
                case_parser.hashed_dispo_data[charge_id].get('date'),
                case_parser.hashed_dispo_data[charge_id].get('ruling'))
        return Charge.create(**charge)
示例#7
0
    def test_empty_record(self):
        """Tests it can parse a blank record."""
        parser = RecordParser()
        parser.feed(JohnDoe.BLANK_RECORD)

        assert len(parser.cases) == 0
示例#8
0
class Crawler:
    def __init__(self):
        self.session = requests.Session()
        self.response = requests.Response()
        self.result = RecordParser()

    def login(self, username, password, close_session=False):
        url = URL.login_url()
        payload = Payload.login_payload(username, password)

        self.response = self.session.post(url, data=payload)

        if close_session:
            self.session.close()

        return Crawler.__login_validation(self.response, url)

    def search(self, first_name, last_name, middle_name="", birth_date=""):
        url = "https://publicaccess.courts.oregon.gov/PublicAccessLogin/Search.aspx?ID=100"
        node_response = self.__parse_nodes(url)
        payload = Crawler.__extract_payload(node_response, last_name,
                                            first_name, middle_name,
                                            birth_date)

        # perform search
        response = self.session.post(url, data=payload)
        self.result.feed(response.text)

        # Parse search results (case detail pages)
        with ThreadPoolExecutor(max_workers=50) as executor:
            executor.map(self.__build_case, self.result.cases)

        self.session.close()
        return Record(self.result.cases)

    def __build_case(self, case):
        case_parser_data = self.__parse_case(case)
        case.set_probation_revoked(case_parser_data.probation_revoked)
        case.set_balance_due(case_parser_data.balance_due)
        for charge_id, charge in case_parser_data.hashed_charge_data.items():
            charge["case"] = case
            new_charge = Crawler.__build_charge(charge_id, charge,
                                                case_parser_data)
            case.charges.append(new_charge)

    def __parse_nodes(self, url):
        node_parser = NodeParser()
        node_parser.feed(self.response.text)
        payload = {"NodeID": node_parser.node_id, "NodeDesc": "All+Locations"}
        return self.session.post(url, data=payload)

    def __parse_case(self, case):
        response = self.session.get(case.case_detail_link)
        return CaseParser.feed(response.text)

    @staticmethod
    def __extract_payload(node_response, last_name, first_name, middle_name,
                          birth_date):
        param_parser = ParamParser()
        param_parser.feed(node_response.text)
        return Payload.payload(param_parser, last_name, first_name,
                               middle_name, birth_date)

    @staticmethod
    def __login_validation(response, login_url):
        return response.url != login_url

    @staticmethod
    def __build_charge(charge_id, charge, case_parser_data):
        if case_parser_data.hashed_dispo_data.get(charge_id):
            disposition_data = case_parser_data.hashed_dispo_data[charge_id]
            date = datetime.date(
                datetime.strptime(
                    disposition_data.get("date"),
                    "%m/%d/%Y"))  # TODO: Log error if format is not correct
            ruling = disposition_data.get("ruling")
            charge["disposition"] = Disposition(date, ruling)
        return ChargeCreator.create(**charge)
class TestRecordParser(unittest.TestCase):
    def setUp(self):
        self.parser = RecordParser()
        self.parser.feed(JohnDoe.RECORD)
        self.case1_date = datetime.date(1963, 3, 23)
        self.case2_date = datetime.date(1963, 4, 11)
        self.case3_date = datetime.date(2012, 4, 1)
        self.base_uri = "https://publicaccess.courts.oregon.gov/PublicAccessLogin/CaseDetail.aspx?CaseID="

    def test_non_empty_record(self):
        """Tests it can parse a record."""
        assert len(self.parser.cases) == 3

    def test_it_assigns_case_info(self):
        for case in self.parser.cases:
            assert case.name == "Doe, John D"
            assert case.birth_year == 1943

    def test_it_assigns_case_number(self):
        assert self.parser.cases[0].case_number == "X0001"
        assert self.parser.cases[1].case_number == "X0002"
        assert self.parser.cases[2].case_number == "X0003"

    def test_it_assigns_link_to_case(self):
        assert self.parser.cases[0].case_detail_link == self.base_uri + "X0001"
        assert self.parser.cases[1].case_detail_link == self.base_uri + "X0002"
        assert self.parser.cases[2].case_detail_link == self.base_uri + "X0003"

    def test_it_assigns_citation(self):
        assert self.parser.cases[0].citation_number == "C0001"
        assert self.parser.cases[1].citation_number == "C0002"
        assert self.parser.cases[2].citation_number == ""

    def test_it_assigns_date_location(self):
        assert self.parser.cases[0].date == self.case1_date
        assert self.parser.cases[0].location == "Multnomah"

        assert self.parser.cases[1].date == self.case2_date
        assert self.parser.cases[1].location == "Multnomah"

        assert self.parser.cases[2].date == self.case3_date
        assert self.parser.cases[2].location == "Multnomah"

    def test_it_assigns_violation_status_info(self):
        assert self.parser.cases[0].violation_type == "Offense Misdemeanor"
        assert self.parser.cases[0].current_status == "Closed"

        assert self.parser.cases[1].violation_type == "Offense Felony"
        assert self.parser.cases[1].current_status == "Closed"

        assert self.parser.cases[2].violation_type == "Offense Misdemeanor"
        assert self.parser.cases[2].current_status == "Open"

    def test_it_assigns_charges(self):
        # first case
        assert len(self.parser.cases[0].charges) == 3

        assert self.parser.cases[0].charges[
            0] == 'Attempt to Commit a Class C/Unclassified Felony'
        assert self.parser.cases[0].charges[1] == 'Drug Free Zone Variance'
        assert self.parser.cases[0].charges[
            2] == 'Criminal Trespass in the Second Degree'

        # second case
        assert len(self.parser.cases[1].charges) == 2

        assert self.parser.cases[1].charges[
            0] == 'Poss Controlled Sub 2 (Reduced - to A Misdemeanor)'
        assert self.parser.cases[1].charges[1] == 'Drug Free Zone Variance'

        # third case
        assert len(self.parser.cases[2].charges) == 1

        assert self.parser.cases[1].charges[
            0] == 'Poss Controlled Sub 2 (Reduced - to A Misdemeanor)'
示例#10
0
class Crawler:
    def __init__(self):
        self.session = requests.Session()
        self.response = requests.Response()
        self.result = RecordParser()

    def login(self, username, password, close_session=False) -> bool:
        url = URL.login_url()
        payload = Payload.login_payload(username, password)

        self.response = self.session.post(url, data=payload)

        if close_session:
            self.session.close()

        return Crawler.__login_validation(self.response)

    def search(self,
               first_name,
               last_name,
               middle_name="",
               birth_date="") -> Tuple[List[AmbiguousCase], List[Question]]:
        url = "https://publicaccess.courts.oregon.gov/PublicAccessLogin/Search.aspx?ID=100"
        node_response = self.__parse_nodes(url)
        payload = Crawler.__extract_payload(node_response, last_name,
                                            first_name, middle_name,
                                            birth_date)

        # perform search
        response = self.session.post(url, data=payload)
        self.result.feed(response.text)

        case_limit = 300
        if len(self.result.cases) >= case_limit:
            raise ValueError(
                f"Found {len(self.result.cases)} matching cases, exceeding the limit of {case_limit}. Please add a date of birth to your search."
            )
        else:
            # Parse search results (case detail pages)
            with ThreadPoolExecutor(max_workers=50) as executor:
                ambiguous_cases: List[AmbiguousCase] = []
                questions_accumulator: List[Question] = []
                for ambiguous_case, questions in executor.map(
                        self.__build_case, self.result.cases):
                    ambiguous_cases.append(ambiguous_case)
                    questions_accumulator += questions
            self.session.close()
            return ambiguous_cases, questions_accumulator

    def __build_case(self, case) -> Tuple[AmbiguousCase, List[Question]]:
        case_parser_data = self.__parse_case(case)
        balance_due_in_cents = CaseCreator.compute_balance_due_in_cents(
            case_parser_data.balance_due)
        updated_case = replace(
            case,
            balance_due_in_cents=balance_due_in_cents,
            probation_revoked=case_parser_data.probation_revoked)
        ambiguous_charges: List[AmbiguousCharge] = []
        questions: List[Question] = []
        for charge_id, charge_dict in case_parser_data.hashed_charge_data.items(
        ):
            charge_dict["case_number"] = updated_case.case_number
            charge_dict["violation_type"] = updated_case.violation_type
            ambiguous_charge, question = Crawler.__build_charge(
                charge_id, charge_dict, case_parser_data)
            ambiguous_charges.append(ambiguous_charge)
            if question:
                questions.append(question)
        ambiguous_case = []
        for charges in product(*ambiguous_charges):
            possible_case = replace(updated_case, charges=tuple(charges))
            ambiguous_case.append(possible_case)
        return ambiguous_case, questions

    def __parse_nodes(self, url):
        node_parser = NodeParser()
        node_parser.feed(self.response.text)
        payload = {"NodeID": node_parser.node_id, "NodeDesc": "All+Locations"}
        return self.session.post(url, data=payload)

    def __parse_case(self, case):
        response = self.session.get(case.case_detail_link)
        if response.status_code == 200 and response.text:
            return CaseParser.feed(response.text)
        else:
            raise ValueError(
                f"Failed to fetch case detail page. Please rerun the search.")

    @staticmethod
    def __extract_payload(node_response, last_name, first_name, middle_name,
                          birth_date):
        param_parser = ParamParser()
        param_parser.feed(node_response.text)
        return Payload.payload(param_parser, last_name, first_name,
                               middle_name, birth_date)

    @staticmethod
    def __login_validation(response):
        return "Case Records" in response.text

    @staticmethod
    def __build_charge(
            charge_id, charge,
            case_parser_data) -> Tuple[AmbiguousCharge, Optional[Question]]:
        if case_parser_data.hashed_dispo_data.get(charge_id):
            disposition_data = case_parser_data.hashed_dispo_data[charge_id]
            date = datetime.date(
                datetime.strptime(
                    disposition_data.get("date"),
                    "%m/%d/%Y"))  # TODO: Log error if format is not correct
            ruling = disposition_data.get("ruling")
            charge["disposition"] = DispositionCreator.create(
                date, ruling, "amended" in disposition_data["event"].lower())
        return ChargeCreator.create(charge_id, **charge)