示例#1
0
    def _merge_row_into_ingest_info(self, ingest_info, row_ii):
        row_person = scraper_utils.one('person', row_ii)
        existing_person = ingest_info.get_person_by_id(row_person.person_id)
        if not existing_person:
            ingest_info.people.append(row_person)
            return

        if len(row_person.bookings) != 1:
            raise DirectIngestError(
                error_type=DirectIngestErrorType.PARSE_ERROR,
                msg="Exactly one booking must be on each row.")
        row_booking = row_person.bookings[0]

        existing_booking = existing_person.get_booking_by_id(
            row_booking.booking_id)
        if not existing_booking:
            existing_person.bookings.append(row_booking)
            return

        if len(row_booking.charges) != 1:
            raise DirectIngestError(
                error_type=DirectIngestErrorType.PARSE_ERROR,
                msg="Exactly one charge must be on each row.")
        row_charge = row_booking.charges[0]
        existing_booking.charges.append(row_charge)
示例#2
0
 def test_oneBond_passes(self):
     ii = IngestInfo()
     b = ii.create_person().create_booking().create_charge().create_bond()
     self.assertIs(b, scraper_utils.one("bond", ii))
示例#3
0
 def test_oneBooking_passes(self):
     ii = IngestInfo()
     b = ii.create_person().create_booking()
     b.create_arrest()
     self.assertIs(b, scraper_utils.one("booking", ii))
示例#4
0
 def test_noSentence_raises(self):
     ii = IngestInfo()
     ii.create_person().create_booking().create_charge().create_bond()
     with self.assertRaises(ValueError):
         scraper_utils.one("sentence", ii)
示例#5
0
 def test_twoPeople_raises(self):
     ii = IngestInfo()
     ii.create_person().create_booking()
     ii.create_person()
     with self.assertRaises(ValueError):
         scraper_utils.one("booking", ii)
示例#6
0
 def test_onePerson_passes(self):
     ii = IngestInfo()
     p = ii.create_person()
     self.assertIs(p, scraper_utils.one("person", ii))
示例#7
0
    def parse(self, json_people: Iterable[Dict]) -> IngestInfo:
        """Uses the JsonDataExtractor to convert JSON data at the person level
        to IngestInfo objects."""
        extractor = JsonDataExtractor(self.yaml_file)
        bond_extractor = JsonDataExtractor(self.bond_yaml_file)

        # Group people by person id. Since we're iterating over bookings, not
        # people, we have to manually merge people's bookings.
        people: Dict[str, List[Person]] = defaultdict(list)

        for person_dict in json_people:
            ii = extractor.extract_and_populate_data(person_dict)
            person = scraper_utils.one('person', ii)
            person.place_of_residence = self.get_address(person_dict)
            # TODO(1802): parse ethnicity in enum overrides
            if person.race == 'HISPANIC' or person.ethnicity == 'Y':
                person.race, person.ethnicity = None, 'HISPANIC'
            else:
                person.ethnicity = None

            booking = scraper_utils.one('booking', ii)
            booking.admission_reason = self.get_admission_reason(person_dict)

            for hold in person_dict['hold']:
                jurisdiction_name = hold['holding_for_agency']
                if jurisdiction_name == 'Request to Hold':
                    jurisdiction_name = hold['charges']
                booking.create_hold(hold_id=hold['pkey'],
                                    jurisdiction_name=jurisdiction_name)

            # Bonds are shared across all charges within a single case
            for bond_dict in person_dict['bond']:
                bond = scraper_utils.one(
                    'bond',
                    bond_extractor.extract_and_populate_data(bond_dict))
                case_pk = bond_dict['case_pk']
                matching_charges = (c for c in ii.get_all_charges()
                                    if c.case_number == case_pk)
                if matching_charges:
                    for charge in matching_charges:
                        charge.bond = bond
                else:
                    # Some bonds have no charges associated with their case
                    booking.create_charge(bond=bond)

            court_type = person_dict['booking']['commiting_authority']
            for charge in ii.get_all_charges():
                charge.court_type = court_type
                charge.status = self.get_charge_status(person_dict,
                                                       charge.charge_id)

                if charge.degree:
                    logging.info(
                        "Charge degree found, but we don't expect it "
                        "to be filled in: \n%s", ii)
                if charge.charge_class:
                    logging.info(
                        "Charge class found, but we don't expect it "
                        "to be filled in: \n%s", ii)
                if charge.number_of_counts:
                    match = re.search(r"([0-9]+) (?:other )?mitts",
                                      charge.number_of_counts, re.IGNORECASE)
                    charge.number_of_counts = match.group(1) if match else None

            for bond in ii.get_all_bonds(lambda b: b.bond_agent):
                # bond.speccond (stored temporarily in bond.bond_agent) might
                # have two cash values separated by a slash, indicating a
                # partial bond.
                if re.search(r'[0-9]+ */ *[0-9]+', bond.bond_agent):
                    bond.bond_type = BondType.PARTIAL_CASH.value
                bond.bond_agent = None

            people[person.person_id].append(person)

        def merge_bookings(dupes):
            base = dupes.pop()
            for p in dupes:
                base.bookings.extend(p.bookings)
            return base

        merged_people = [merge_bookings(dupes) for dupes in people.values()]
        return IngestInfo(people=merged_people)