def test_it_assigns_birth_year(self): parser = RecordParser() parser.feed(JohnDoe.RECORD_WITH_MISSING_BIRTH_YEAR) assert parser.cases[0].birth_year == 1943 assert parser.cases[1].birth_year == 1943 assert parser.cases[2].birth_year is None
class TestRecordParser(unittest.TestCase): def setUp(self): self.parser = RecordParser() self.parser.feed(JohnDoe.RECORD) self.case1_date = datetime.date(1963, 3, 23) self.case2_date = datetime.date(1963, 4, 11) self.case3_date = datetime.date(2012, 4, 1) self.base_uri = "https://publicaccess.courts.oregon.gov/PublicAccessLogin/CaseDetail.aspx?CaseID=" def test_non_empty_record(self): """Tests it can parse a record.""" assert len(self.parser.cases) == 3 def test_it_assigns_case_info(self): for case in self.parser.cases: assert case.name == "Doe, John D" assert case.birth_year == 1943 def test_it_assigns_case_number(self): assert self.parser.cases[0].case_number == "X0001" assert self.parser.cases[1].case_number == "X0002" assert self.parser.cases[2].case_number == "X0003" def test_it_assigns_link_to_case(self): assert self.parser.cases[0].case_detail_link == self.base_uri + "X0001" assert self.parser.cases[1].case_detail_link == self.base_uri + "X0002" assert self.parser.cases[2].case_detail_link == self.base_uri + "X0003" def test_it_assigns_citation(self): assert self.parser.cases[0].citation_number == "C0001" assert self.parser.cases[1].citation_number == "C0002" assert self.parser.cases[2].citation_number == "" def test_it_assigns_date_location(self): assert self.parser.cases[0].date == self.case1_date assert self.parser.cases[0].location == "Multnomah" assert self.parser.cases[1].date == self.case2_date assert self.parser.cases[1].location == "Multnomah" assert self.parser.cases[2].date == self.case3_date assert self.parser.cases[2].location == "Multnomah" def test_it_assigns_violation_status_info(self): assert self.parser.cases[0].violation_type == "Offense Misdemeanor" assert self.parser.cases[0].current_status == "Closed" assert self.parser.cases[1].violation_type == "Offense Felony" assert self.parser.cases[1].current_status == "Closed" assert self.parser.cases[2].violation_type == "Offense Misdemeanor" assert self.parser.cases[2].current_status == "Open" def test_it_assigns_charges(self): assert len(self.parser.cases[0].charges) == 0 assert len(self.parser.cases[1].charges) == 0 assert len(self.parser.cases[2].charges) == 0
def _search_record(session: Session, node_response, search_url, first_name, last_name, middle_name, birth_date): payload = Crawler.__extract_payload(node_response, last_name, first_name, middle_name, birth_date) response = session.post(search_url, data=payload, timeout=30) record_parser = RecordParser() record_parser.feed(response.text) return record_parser
class Crawler: def __init__(self): self.session = requests.Session() self.response = requests.Response self.result = RecordParser() def login(self, username, password, close_session=False): url = URL.login_url() payload = Payload.login_payload(username, password) self.response = self.session.post(url, data=payload) if close_session: self.session.close() return Crawler.__login_validation(self.response, url) def search(self, first_name, last_name, middle_name='', birth_date=''): url = 'https://publicaccess.courts.oregon.gov/PublicAccessLogin/Search.aspx?ID=100' node_response = self.__parse_nodes(url) payload = Crawler.__extract_payload(node_response, last_name, first_name, middle_name, birth_date) # perform search response = self.session.post(url, data=payload) self.result.feed(response.text) # Parse search results (case detail pages) for case in self.result.cases: case_parser = self.__parse_case(case) case.set_balance_due(case_parser.balance_due) for charge_id, charge in case_parser.hashed_charge_data.items(): charge['case'] = case new_charge = Crawler.__build_charge(charge_id, charge, case_parser) case.charges.append(new_charge) self.session.close() return Record(self.result.cases) def __parse_nodes(self, url): node_parser = NodeParser() node_parser.feed(self.response.text) payload = {'NodeID': node_parser.node_id, 'NodeDesc': 'All+Locations'} return self.session.post(url, data=payload) def __parse_case(self, case): case_parser = CaseParser() response = self.session.get(case.case_detail_link) case_parser.feed(response.text) return case_parser @staticmethod def __extract_payload(node_response, last_name, first_name, middle_name, birth_date): param_parser = ParamParser() param_parser.feed(node_response.text) return Payload.payload(param_parser, last_name, first_name, middle_name, birth_date) @staticmethod def __login_validation(response, login_url): return response.url != login_url @staticmethod def __build_charge(charge_id, charge, case_parser): if case_parser.hashed_dispo_data.get(charge_id): charge['disposition'] = Disposition( case_parser.hashed_dispo_data[charge_id].get('date'), case_parser.hashed_dispo_data[charge_id].get('ruling')) return Charge.create(**charge)
def test_empty_record(self): """Tests it can parse a blank record.""" parser = RecordParser() parser.feed(JohnDoe.BLANK_RECORD) assert len(parser.cases) == 0
class Crawler: def __init__(self): self.session = requests.Session() self.response = requests.Response() self.result = RecordParser() def login(self, username, password, close_session=False): url = URL.login_url() payload = Payload.login_payload(username, password) self.response = self.session.post(url, data=payload) if close_session: self.session.close() return Crawler.__login_validation(self.response, url) def search(self, first_name, last_name, middle_name="", birth_date=""): url = "https://publicaccess.courts.oregon.gov/PublicAccessLogin/Search.aspx?ID=100" node_response = self.__parse_nodes(url) payload = Crawler.__extract_payload(node_response, last_name, first_name, middle_name, birth_date) # perform search response = self.session.post(url, data=payload) self.result.feed(response.text) # Parse search results (case detail pages) with ThreadPoolExecutor(max_workers=50) as executor: executor.map(self.__build_case, self.result.cases) self.session.close() return Record(self.result.cases) def __build_case(self, case): case_parser_data = self.__parse_case(case) case.set_probation_revoked(case_parser_data.probation_revoked) case.set_balance_due(case_parser_data.balance_due) for charge_id, charge in case_parser_data.hashed_charge_data.items(): charge["case"] = case new_charge = Crawler.__build_charge(charge_id, charge, case_parser_data) case.charges.append(new_charge) def __parse_nodes(self, url): node_parser = NodeParser() node_parser.feed(self.response.text) payload = {"NodeID": node_parser.node_id, "NodeDesc": "All+Locations"} return self.session.post(url, data=payload) def __parse_case(self, case): response = self.session.get(case.case_detail_link) return CaseParser.feed(response.text) @staticmethod def __extract_payload(node_response, last_name, first_name, middle_name, birth_date): param_parser = ParamParser() param_parser.feed(node_response.text) return Payload.payload(param_parser, last_name, first_name, middle_name, birth_date) @staticmethod def __login_validation(response, login_url): return response.url != login_url @staticmethod def __build_charge(charge_id, charge, case_parser_data): if case_parser_data.hashed_dispo_data.get(charge_id): disposition_data = case_parser_data.hashed_dispo_data[charge_id] date = datetime.date( datetime.strptime( disposition_data.get("date"), "%m/%d/%Y")) # TODO: Log error if format is not correct ruling = disposition_data.get("ruling") charge["disposition"] = Disposition(date, ruling) return ChargeCreator.create(**charge)
class TestRecordParser(unittest.TestCase): def setUp(self): self.parser = RecordParser() self.parser.feed(JohnDoe.RECORD) self.case1_date = datetime.date(1963, 3, 23) self.case2_date = datetime.date(1963, 4, 11) self.case3_date = datetime.date(2012, 4, 1) self.base_uri = "https://publicaccess.courts.oregon.gov/PublicAccessLogin/CaseDetail.aspx?CaseID=" def test_non_empty_record(self): """Tests it can parse a record.""" assert len(self.parser.cases) == 3 def test_it_assigns_case_info(self): for case in self.parser.cases: assert case.name == "Doe, John D" assert case.birth_year == 1943 def test_it_assigns_case_number(self): assert self.parser.cases[0].case_number == "X0001" assert self.parser.cases[1].case_number == "X0002" assert self.parser.cases[2].case_number == "X0003" def test_it_assigns_link_to_case(self): assert self.parser.cases[0].case_detail_link == self.base_uri + "X0001" assert self.parser.cases[1].case_detail_link == self.base_uri + "X0002" assert self.parser.cases[2].case_detail_link == self.base_uri + "X0003" def test_it_assigns_citation(self): assert self.parser.cases[0].citation_number == "C0001" assert self.parser.cases[1].citation_number == "C0002" assert self.parser.cases[2].citation_number == "" def test_it_assigns_date_location(self): assert self.parser.cases[0].date == self.case1_date assert self.parser.cases[0].location == "Multnomah" assert self.parser.cases[1].date == self.case2_date assert self.parser.cases[1].location == "Multnomah" assert self.parser.cases[2].date == self.case3_date assert self.parser.cases[2].location == "Multnomah" def test_it_assigns_violation_status_info(self): assert self.parser.cases[0].violation_type == "Offense Misdemeanor" assert self.parser.cases[0].current_status == "Closed" assert self.parser.cases[1].violation_type == "Offense Felony" assert self.parser.cases[1].current_status == "Closed" assert self.parser.cases[2].violation_type == "Offense Misdemeanor" assert self.parser.cases[2].current_status == "Open" def test_it_assigns_charges(self): # first case assert len(self.parser.cases[0].charges) == 3 assert self.parser.cases[0].charges[ 0] == 'Attempt to Commit a Class C/Unclassified Felony' assert self.parser.cases[0].charges[1] == 'Drug Free Zone Variance' assert self.parser.cases[0].charges[ 2] == 'Criminal Trespass in the Second Degree' # second case assert len(self.parser.cases[1].charges) == 2 assert self.parser.cases[1].charges[ 0] == 'Poss Controlled Sub 2 (Reduced - to A Misdemeanor)' assert self.parser.cases[1].charges[1] == 'Drug Free Zone Variance' # third case assert len(self.parser.cases[2].charges) == 1 assert self.parser.cases[1].charges[ 0] == 'Poss Controlled Sub 2 (Reduced - to A Misdemeanor)'
class Crawler: def __init__(self): self.session = requests.Session() self.response = requests.Response() self.result = RecordParser() def login(self, username, password, close_session=False) -> bool: url = URL.login_url() payload = Payload.login_payload(username, password) self.response = self.session.post(url, data=payload) if close_session: self.session.close() return Crawler.__login_validation(self.response) def search(self, first_name, last_name, middle_name="", birth_date="") -> Tuple[List[AmbiguousCase], List[Question]]: url = "https://publicaccess.courts.oregon.gov/PublicAccessLogin/Search.aspx?ID=100" node_response = self.__parse_nodes(url) payload = Crawler.__extract_payload(node_response, last_name, first_name, middle_name, birth_date) # perform search response = self.session.post(url, data=payload) self.result.feed(response.text) case_limit = 300 if len(self.result.cases) >= case_limit: raise ValueError( f"Found {len(self.result.cases)} matching cases, exceeding the limit of {case_limit}. Please add a date of birth to your search." ) else: # Parse search results (case detail pages) with ThreadPoolExecutor(max_workers=50) as executor: ambiguous_cases: List[AmbiguousCase] = [] questions_accumulator: List[Question] = [] for ambiguous_case, questions in executor.map( self.__build_case, self.result.cases): ambiguous_cases.append(ambiguous_case) questions_accumulator += questions self.session.close() return ambiguous_cases, questions_accumulator def __build_case(self, case) -> Tuple[AmbiguousCase, List[Question]]: case_parser_data = self.__parse_case(case) balance_due_in_cents = CaseCreator.compute_balance_due_in_cents( case_parser_data.balance_due) updated_case = replace( case, balance_due_in_cents=balance_due_in_cents, probation_revoked=case_parser_data.probation_revoked) ambiguous_charges: List[AmbiguousCharge] = [] questions: List[Question] = [] for charge_id, charge_dict in case_parser_data.hashed_charge_data.items( ): charge_dict["case_number"] = updated_case.case_number charge_dict["violation_type"] = updated_case.violation_type ambiguous_charge, question = Crawler.__build_charge( charge_id, charge_dict, case_parser_data) ambiguous_charges.append(ambiguous_charge) if question: questions.append(question) ambiguous_case = [] for charges in product(*ambiguous_charges): possible_case = replace(updated_case, charges=tuple(charges)) ambiguous_case.append(possible_case) return ambiguous_case, questions def __parse_nodes(self, url): node_parser = NodeParser() node_parser.feed(self.response.text) payload = {"NodeID": node_parser.node_id, "NodeDesc": "All+Locations"} return self.session.post(url, data=payload) def __parse_case(self, case): response = self.session.get(case.case_detail_link) if response.status_code == 200 and response.text: return CaseParser.feed(response.text) else: raise ValueError( f"Failed to fetch case detail page. Please rerun the search.") @staticmethod def __extract_payload(node_response, last_name, first_name, middle_name, birth_date): param_parser = ParamParser() param_parser.feed(node_response.text) return Payload.payload(param_parser, last_name, first_name, middle_name, birth_date) @staticmethod def __login_validation(response): return "Case Records" in response.text @staticmethod def __build_charge( charge_id, charge, case_parser_data) -> Tuple[AmbiguousCharge, Optional[Question]]: if case_parser_data.hashed_dispo_data.get(charge_id): disposition_data = case_parser_data.hashed_dispo_data[charge_id] date = datetime.date( datetime.strptime( disposition_data.get("date"), "%m/%d/%Y")) # TODO: Log error if format is not correct ruling = disposition_data.get("ruling") charge["disposition"] = DispositionCreator.create( date, ruling, "amended" in disposition_data["event"].lower()) return ChargeCreator.create(charge_id, **charge)