def parse_region_code_with_override(proto, region_field_name: str, metadata: IngestMetadata): """Returns a normalized form of the region code living on the |proto|. Normalizes the region code at the field with the given |region_field_name|, unless the given |metadata| contains a region. If so, returns the normalized form of that metadata region instead.""" if metadata and metadata.region: return normalize(metadata.region) if proto.HasField(region_field_name): return normalize(getattr(proto, region_field_name)) return None
def add(self, label_or_predicate: Union[str, Callable[[str], bool]], mapped_enum: EntityEnum, from_field: EntityEnumMeta = None) -> 'EnumOverrides.Builder': """Adds a mapping from |match| to |mapped_enum|. |match| can be either a string value, in which case the field value must match the string exactly, or it can be a predicate specifying which strings constitute a match. Optionally, the |from_field| parameter allows values to be mapped accross fields. For example: `add('PENDING', BondStatus.PENDING, BondType)` remaps the bond_type field to a bond_status when the bond_type is set to 'PENDING'. Mappings *between* entity types are not allowed. Note: take care not to add multiple predicates which are properties of the same string, as EnumOverrides.parse will throw an exception if too many matches are found. """ if from_field is None: from_field = mapped_enum.__class__ if isinstance(label_or_predicate, str): label = normalize(label_or_predicate, remove_punctuation=True) self._maps[from_field][label] = mapped_enum else: predicate = label_or_predicate self._predicate_maps[from_field].add( _EnumMatcher(predicate, mapped_enum)) return self
def _parse_state_external_id(id_str): """Undoes preprocessing of state_person_external_ids done when converting from ingest_info py -> ingest_info proto. """ synthetic_id = parse_external_id(id_str) external_id = get_external_id(synthetic_id=synthetic_id) return normalize(external_id)
def add( self, label: str, mapped_enum: EntityEnum, from_field: Optional[EntityEnumType] = None, force_overwrite: bool = False, ) -> "EnumOverrides.Builder": """Adds a mapping from |label| to |mapped_enum|. As |label| must be a string, the provided field value must match the string exactly to constitute a match. Optionally, the |from_field| parameter allows values to be mapped across fields. For example: `add('PENDING', BondStatus.PENDING, BondType)` remaps the bond_type field to a bond_status when the bond_type is set to 'PENDING'. Mappings *between* entity types are not allowed. If the |force_overwrite| parameter is set, then it is permitted to change the entity enum an existing label maps to. Without it, attempting to re-set a label to a different value will raise an exception. """ if from_field is None: from_field = mapped_enum.__class__ label = normalize(label, remove_punctuation=True) if (not force_overwrite and (old_mapping := self._str_mappings_dict[from_field].get( label, mapped_enum)) != mapped_enum): # A mapping already exists for this label and it differs from the # mapped value that was passed in. raise ValueError( "Cannot override a mapping that has already been set. " f"{label=} was mapped to {old_mapping=} but call was made to map to {mapped_enum=}" )
def _parse_to_enum( cls: Type[ClsT], label: str, enum_overrides: "EnumOverrides") -> Optional["EntityEnum"]: """Attempts to parse |label| using the default map of |cls| and the provided |override_map|. Ignores punctuation by treating punctuation as a separator, e.g. `(N/A)` will map to the same value as `N A`.""" label = normalize(label, remove_punctuation=True) if enum_overrides.should_ignore(label, cls): return None try: overridden_value = enum_overrides.parse(label, cls) except Exception as e: if isinstance(e, EnumParsingError): raise e # If a mapper throws another type of error, convert it to an enum parsing error raise EnumParsingError(cls, label) from e if overridden_value is not None: return overridden_value complete_map = cls._get_default_map() try: return complete_map[label] except KeyError as e: raise EnumParsingError(cls, label) from e
def ignore(self, label: str, from_field: EntityEnumType) -> "EnumOverrides.Builder": """Marks strings matching |label| as ignored values for |from_field| enum class.""" label = normalize(label, remove_punctuation=True) self._ignores[from_field].add(label) return self
def test_supervising_officer_mapper_po_roles(self) -> None: """Tests that all PO job titles for MO are properly classified.""" parole_officer_roles = [ "P&P OF I", "PROBATION/PAROLE OFCR II", "PROBATION & PAROLE OFCR I", "P&P UNIT SPV", "PROBATION/PAROLE UNIT SPV", "PROBATION/PAROLE OFCR I", "DIST ADMIN II (P & P)", "PROBATION & PAROLE UNIT S", "DIST ADMIN I (P & P)", "P&P OF II", "P&P ASST I", "PROBATION/PAROLE ASST I", "PROBATION/PAROLE OFCR III", "PROBATION/PAROLE ASST II", "PROBATION & PAROLE ASST I", "P&P ASST II", "P&P ADMIN", "PROBATION & PAROLE OFCR 1", "PROBATION/PAROLE OFCER II", "PROBATION?PAROLE OFCR I", "P&P OFF I", "P&P UNIT SUPV", "PROBATION 7 PAROLE OFCR I", "PROBATION & PAROLE OFCR I", ] normalized_po_roles = [ normalize(role, remove_punctuation=True) for role in parole_officer_roles ] for role in normalized_po_roles: self.assertEqual(StateAgentType.SUPERVISION_OFFICER, supervising_officer_mapper(role))
def test_parse_supervision_admission_reason_transfer_within_state( self) -> None: input_statuses = normalize( StateSupervisionPeriodAdmissionReason.TRANSFER_WITHIN_STATE.value, remove_punctuation=True, ) reason = supervision_period_admission_reason_mapper(input_statuses) self.assertEqual( StateSupervisionPeriodAdmissionReason.TRANSFER_WITHIN_STATE, reason)
def ignore(self, label_or_predicate: Union[str, Callable[[str], bool]], from_field: EntityEnumMeta) -> 'EnumOverrides.Builder': """Marks strings matching |label_or_predicate| as ignored values for |enum_class|.""" if isinstance(label_or_predicate, str): label = normalize(label_or_predicate, remove_punctuation=True) self._ignores[from_field].add(label) else: predicate = label_or_predicate self._predicate_ignores[from_field].add(predicate) return self
def copy_fields_to_builder( new: entities.StateCharge.Builder, proto: StateCharge, metadata: IngestMetadata) -> None: """Mutates the provided |charge_builder| by converting an ingest_info proto StateCharge. Note: This will not copy children into the Builder! """ enum_fields = { 'status': ChargeStatus, 'classification_type': StateChargeClassificationType, } enum_mappings = EnumMappings(proto, enum_fields, metadata.enum_overrides) # Enum values new.status = enum_mappings.get(ChargeStatus, default=ChargeStatus.PRESENT_WITHOUT_INFO) new.status_raw_text = fn(normalize, 'status', proto) new.classification_type = \ enum_mappings.get(StateChargeClassificationType) new.classification_type_raw_text = \ fn(normalize, 'classification_type', proto) # 1-to-1 mappings new.external_id = fn(parse_external_id, 'state_charge_id', proto) new.offense_date = fn(parse_date, 'offense_date', proto) new.date_charged = fn(parse_date, 'date_charged', proto) new.state_code = parse_region_code_with_override( proto, 'state_code', metadata) new.county_code = fn(normalize, 'county_code', proto) new.statute = fn(normalize, 'statute', proto) new.ncic_code = fn(normalize, 'ncic_code', proto) new.description = fn(normalize, 'description', proto) if new.description is None and new.ncic_code is not None: ncic_description = ncic.get_description(new.ncic_code) if ncic_description: new.description = normalize(ncic_description) new.attempted = fn(parse_bool, 'attempted', proto) if new.classification_type is None: new.classification_type = \ StateChargeClassificationType.find_in_string(new.description) new.classification_subtype = \ fn(normalize, 'classification_subtype', proto) new.counts = fn(parse_int, 'counts', proto) new.charge_notes = fn(normalize, 'charge_notes', proto) new.is_controlling = fn(parse_bool, 'is_controlling', proto) new.charging_entity = fn(normalize, 'charging_entity', proto)
def _us_mo_get_pfi_info_for_period_if_commitment_from_supervision( incarceration_period_list_index: int, sorted_incarceration_periods: List[StateIncarcerationPeriod], ) -> PurposeForIncarcerationInfo: """Infers the correct purpose_for_incarceration values for sanction admissions to periods that don't have the correct values added at ingest-time. Looks at the treatment and shock incarceration codes in the admission_reason_raw_text to determine what kind of sanction admission occurred.""" ip = sorted_incarceration_periods[incarceration_period_list_index] pfi_override = None if (ip.admission_reason == StateIncarcerationPeriodAdmissionReason.SANCTION_ADMISSION and ip.specialized_purpose_for_incarceration not in SANCTION_ADMISSION_PURPOSE_FOR_INCARCERATION_VALUES and ip.admission_reason_raw_text is not None): # Find the correct pfi for this sanction admission status_codes = normalize( ip.admission_reason_raw_text, remove_punctuation=True, ).split(" ") num_treatment_status_codes = 0 num_shock_status_codes = 0 for code in status_codes: if code in TREATMENT_SANCTION_STATUS_CODES: num_treatment_status_codes += 1 if code in SHOCK_SANCTION_STATUS_CODES: num_shock_status_codes += 1 if num_treatment_status_codes == 0 and num_shock_status_codes == 0: raise ValueError( "admission_reason_raw_text: " f"[{ip.admission_reason_raw_text}] is being " "mapped to a SANCTION_ADMISSION without containing " "any sanction admission status codes.") pfi_override = ( StateSpecializedPurposeForIncarceration.SHOCK_INCARCERATION # We don't ever expect to see a mix of treatment and shock codes, # but we handle this rare case by prioritizing TREATMENT_IN_PRISON if num_shock_status_codes > num_treatment_status_codes else StateSpecializedPurposeForIncarceration.TREATMENT_IN_PRISON) return PurposeForIncarcerationInfo( purpose_for_incarceration=(pfi_override or ip.specialized_purpose_for_incarceration), # There are no defined pfi subtypes for US_MO purpose_for_incarceration_subtype=None, )
def add(self, label: str, mapped_enum: EntityEnum, from_field: EntityEnumMeta = None) -> 'EnumOverrides.Builder': """Adds a mapping from |label| to |mapped_enum|. As |label| must be a string, the provided field value must match the string exactly to constitute a match. Optionally, the |from_field| parameter allows values to be mapped across fields. For example: `add('PENDING', BondStatus.PENDING, BondType)` remaps the bond_type field to a bond_status when the bond_type is set to 'PENDING'. Mappings *between* entity types are not allowed. """ if from_field is None: from_field = mapped_enum.__class__ label = normalize(label, remove_punctuation=True) self._str_mappings_dict[from_field][label] = mapped_enum return self
def test_all_enum_values_covered_in_default_overrides(self) -> None: enum_classes = self._get_all_state_enum_classes() self.assertTrue(len(enum_classes) > 0) for entity_enum_cls in enum_classes: default_enum_mappings = getattr(entity_enum_cls, "_get_default_map")() for entity_enum in entity_enum_cls: normalized_value = normalize(entity_enum.value, remove_punctuation=True) self.assertIn( normalized_value, default_enum_mappings, f"[{normalized_value}] not found in " f"{entity_enum_cls} default mappings.", ) self.assertEqual(default_enum_mappings[normalized_value], entity_enum)
def test_all_enum_values_covered_in_default_overrides(self): enum_classes = self._get_all_state_enum_classes() self.assertTrue(len(enum_classes) > 0) for entity_enum_cls in enum_classes: # pylint: disable=protected-access default_enum_mappings = entity_enum_cls._get_default_map() for entity_enum in entity_enum_cls: normalized_value = normalize(entity_enum.value, remove_punctuation=True) self.assertIn( normalized_value, default_enum_mappings, f'[{normalized_value}] not found in ' f'{entity_enum_cls} default mappings.') self.assertEqual(default_enum_mappings[normalized_value], entity_enum)
def _parse_to_enum(cls, label: str, enum_overrides: 'EnumOverrides') \ -> Optional['EntityEnum']: """Attempts to parse |label| using the default map of |cls| and the provided |override_map|. Ignores punctuation by treating punctuation as a separator, e.g. `(N/A)` will map to the same value as `N A`.""" label = normalize(label, remove_punctuation=True) if enum_overrides.should_ignore(label, cls): return None overridden_value = enum_overrides.parse(label, cls) if overridden_value is not None: return overridden_value complete_map = cls._get_default_map() try: return complete_map[label] except KeyError: raise EnumParsingError(cls, label)
def parse(self, label: str, enum_class: EntityEnumMeta) -> Optional[EntityEnum]: label = normalize(label, remove_punctuation=True) if self.should_ignore(label, enum_class): return None direct_lookup = self._maps[enum_class].get(label) if direct_lookup: return direct_lookup matches = {matcher.value for matcher in self._predicate_maps[enum_class] if matcher.predicate(label)} if len(matches) > 1: raise ValueError("Overrides map matched too many values from label" " {}: [{}]".format(label, matches)) if matches: return matches.pop() return None
def parse(self, label: str, enum_class: EntityEnumType) -> Optional[EntityEnum]: label = normalize(label, remove_punctuation=True) if self.should_ignore(label, enum_class): return None direct_lookup = self._str_mappings_dict[enum_class].get(label) if direct_lookup: return direct_lookup matches = { mapper(label) for mapper in self._mappers_dict[enum_class] if mapper(label) is not None } if len(matches) > 1: raise ValueError( "Overrides map matched too many values from label {}: [{}]". format(label, matches)) if matches: return matches.pop() return None
def convert_field_value(field: attr.Attribute, field_value: Union[str, EnumParser]) -> Any: if field_value is None: return None if is_forward_ref(field) or is_list(field): return field_value if isinstance(field_value, str): if not field_value or not field_value.strip(): return None if field.name in converter_overrides: converter = converter_overrides[field.name] if not isinstance(field_value, converter.field_type): raise ValueError( f"Found converter for field [{field.name}] in the converter_overrides, but expected " f"field type [{converter.field_type}] does not match actual field type " f"[{type(field_value)}]") return converter.convert(field_value) if isinstance(field_value, EnumParser): if is_enum(field): return field_value.parse() raise ValueError( f"Found field value [{field_value}] for field that is not an enum [{field}]." ) if isinstance(field_value, str): if is_str(field): return normalize(field_value) if is_date(field): return parse_date(field_value) if is_int(field): return parse_int(field_value) if field.type in {bool, Union[bool, None]}: return parse_bool(field_value) raise ValueError(f"Unsupported field {field.name}")
def create_comma_separated_list(proto, field_name: str): """Returns a normalized, comma-separated string for the list field with the given |field_name| on the given |proto|.""" return ', '.join( [normalize(value) for value in getattr(proto, field_name)])
def should_ignore(self, label: str, enum_class: EntityEnumType) -> bool: label = normalize(label, remove_punctuation=True) predicate_calls = ( predicate(label) for predicate in self._ignore_predicates_dict[enum_class]) return label in self._ignores[enum_class] or any(predicate_calls)
def parse_external_id(id_str): """If the supplied |id_str| is generated, returns None. Otherwise returns the normalized version of the provided |id_str|""" if common_utils.is_generated_id(id_str): return None return normalize(id_str)