Python normalize示例，recidiviz.common.str_field_utils.normalize Python示例

示例#1

0

显示文件

文件： converter_utils.py 项目： teymour-aldridge/pulse-data

def parse_region_code_with_override(proto, region_field_name: str,
                                    metadata: IngestMetadata):
    """Returns a normalized form of the region code living on the |proto|.

    Normalizes the region code at the field with the given |region_field_name|,
    unless the given |metadata| contains a region. If so, returns the normalized
    form of that metadata region instead."""

    if metadata and metadata.region:
        return normalize(metadata.region)
    if proto.HasField(region_field_name):
        return normalize(getattr(proto, region_field_name))
    return None

示例#2

0

显示文件

 def add(self,
         label_or_predicate: Union[str, Callable[[str], bool]],
         mapped_enum: EntityEnum,
         from_field: EntityEnumMeta = None) -> 'EnumOverrides.Builder':
     """Adds a mapping from |match| to |mapped_enum|. |match| can be
     either a string value, in which case the field value must match the
     string exactly, or it can be a predicate specifying which strings
     constitute a match. Optionally, the |from_field| parameter allows
     values to be mapped accross fields. For example:
     `add('PENDING', BondStatus.PENDING, BondType)` remaps the bond_type
     field to a bond_status when the bond_type is set to 'PENDING'.
     Mappings *between* entity types are not allowed.
     Note: take care not to add multiple predicates which are properties
     of the same string, as EnumOverrides.parse will throw an exception
     if too many matches are found.
     """
     if from_field is None:
         from_field = mapped_enum.__class__
     if isinstance(label_or_predicate, str):
         label = normalize(label_or_predicate, remove_punctuation=True)
         self._maps[from_field][label] = mapped_enum
     else:
         predicate = label_or_predicate
         self._predicate_maps[from_field].add(
             _EnumMatcher(predicate, mapped_enum))
     return self

示例#3

0

显示文件

文件： state_person_external_id.py 项目： teymour-aldridge/pulse-data

def _parse_state_external_id(id_str):
    """Undoes preprocessing of state_person_external_ids done when converting
    from ingest_info py -> ingest_info proto.
    """
    synthetic_id = parse_external_id(id_str)
    external_id = get_external_id(synthetic_id=synthetic_id)
    return normalize(external_id)

示例#4

0

显示文件

        def add(
            self,
            label: str,
            mapped_enum: EntityEnum,
            from_field: Optional[EntityEnumType] = None,
            force_overwrite: bool = False,
        ) -> "EnumOverrides.Builder":
            """Adds a mapping from |label| to |mapped_enum|. As |label| must be a string, the provided field value must
            match the string exactly to constitute a match.

            Optionally, the |from_field| parameter allows values to be mapped across fields. For example:
            `add('PENDING', BondStatus.PENDING, BondType)` remaps the bond_type field to a bond_status when the
            bond_type is set to 'PENDING'. Mappings *between* entity types are not allowed.

            If the |force_overwrite| parameter is set, then it is permitted to change the entity enum an
            existing label maps to. Without it, attempting to re-set a label to a different value will raise
            an exception.
            """
            if from_field is None:
                from_field = mapped_enum.__class__
            label = normalize(label, remove_punctuation=True)
            if (not force_overwrite and
                (old_mapping := self._str_mappings_dict[from_field].get(
                    label, mapped_enum)) != mapped_enum):
                # A mapping already exists for this label and it differs from the
                # mapped value that was passed in.
                raise ValueError(
                    "Cannot override a mapping that has already been set. "
                    f"{label=} was mapped to {old_mapping=} but call was made to map to {mapped_enum=}"
                )

示例#5

0

显示文件

文件： entity_enum.py 项目： jazzPouls/pulse-data

    def _parse_to_enum(
            cls: Type[ClsT], label: str,
            enum_overrides: "EnumOverrides") -> Optional["EntityEnum"]:
        """Attempts to parse |label| using the default map of |cls| and the
        provided |override_map|. Ignores punctuation by treating punctuation as
        a separator, e.g. `(N/A)` will map to the same value as `N A`."""
        label = normalize(label, remove_punctuation=True)
        if enum_overrides.should_ignore(label, cls):
            return None

        try:
            overridden_value = enum_overrides.parse(label, cls)
        except Exception as e:
            if isinstance(e, EnumParsingError):
                raise e

            # If a mapper throws another type of error, convert it to an enum parsing error
            raise EnumParsingError(cls, label) from e

        if overridden_value is not None:
            return overridden_value

        complete_map = cls._get_default_map()
        try:
            return complete_map[label]
        except KeyError as e:
            raise EnumParsingError(cls, label) from e

示例#6

0

显示文件

文件： enum_overrides.py 项目： jazzPouls/pulse-data

        def ignore(self, label: str,
                   from_field: EntityEnumType) -> "EnumOverrides.Builder":
            """Marks strings matching |label| as ignored values for |from_field| enum class."""
            label = normalize(label, remove_punctuation=True)
            self._ignores[from_field].add(label)

            return self

示例#7

0

显示文件

文件： us_mo_enum_helpers_test.py 项目： Recidiviz/pulse-data

 def test_supervising_officer_mapper_po_roles(self) -> None:
     """Tests that all PO job titles for MO are properly classified."""
     parole_officer_roles = [
         "P&P OF I",
         "PROBATION/PAROLE OFCR II",
         "PROBATION & PAROLE OFCR I",
         "P&P UNIT SPV",
         "PROBATION/PAROLE UNIT SPV",
         "PROBATION/PAROLE OFCR I",
         "DIST ADMIN II (P & P)",
         "PROBATION & PAROLE UNIT S",
         "DIST ADMIN I (P & P)",
         "P&P OF II",
         "P&P ASST I",
         "PROBATION/PAROLE ASST I",
         "PROBATION/PAROLE OFCR III",
         "PROBATION/PAROLE ASST II",
         "PROBATION & PAROLE ASST I",
         "P&P ASST II",
         "P&P ADMIN",
         "PROBATION & PAROLE OFCR 1",
         "PROBATION/PAROLE OFCER II",
         "PROBATION?PAROLE OFCR I",
         "P&P OFF I",
         "P&P UNIT SUPV",
         "PROBATION 7 PAROLE OFCR I",
         "PROBATION & PAROLE OFCR I",
     ]
     normalized_po_roles = [
         normalize(role, remove_punctuation=True)
         for role in parole_officer_roles
     ]
     for role in normalized_po_roles:
         self.assertEqual(StateAgentType.SUPERVISION_OFFICER,
                          supervising_officer_mapper(role))

示例#8

0

显示文件

文件： us_mo_enum_helpers_test.py 项目： Recidiviz/pulse-data

 def test_parse_supervision_admission_reason_transfer_within_state(
         self) -> None:
     input_statuses = normalize(
         StateSupervisionPeriodAdmissionReason.TRANSFER_WITHIN_STATE.value,
         remove_punctuation=True,
     )
     reason = supervision_period_admission_reason_mapper(input_statuses)
     self.assertEqual(
         StateSupervisionPeriodAdmissionReason.TRANSFER_WITHIN_STATE,
         reason)

示例#9

0

显示文件

        def ignore(self,
                   label_or_predicate: Union[str, Callable[[str], bool]],
                   from_field: EntityEnumMeta) -> 'EnumOverrides.Builder':
            """Marks strings matching |label_or_predicate| as ignored
            values for |enum_class|."""
            if isinstance(label_or_predicate, str):
                label = normalize(label_or_predicate, remove_punctuation=True)
                self._ignores[from_field].add(label)
            else:
                predicate = label_or_predicate
                self._predicate_ignores[from_field].add(predicate)

            return self

示例#10

0

显示文件

def copy_fields_to_builder(
        new: entities.StateCharge.Builder,
        proto: StateCharge,
        metadata: IngestMetadata) -> None:
    """Mutates the provided |charge_builder| by converting an ingest_info proto
     StateCharge.

     Note: This will not copy children into the Builder!
     """

    enum_fields = {
        'status': ChargeStatus,
        'classification_type': StateChargeClassificationType,
    }
    enum_mappings = EnumMappings(proto, enum_fields, metadata.enum_overrides)

    # Enum values
    new.status = enum_mappings.get(ChargeStatus,
                                   default=ChargeStatus.PRESENT_WITHOUT_INFO)
    new.status_raw_text = fn(normalize, 'status', proto)
    new.classification_type = \
        enum_mappings.get(StateChargeClassificationType)
    new.classification_type_raw_text = \
        fn(normalize, 'classification_type', proto)

    # 1-to-1 mappings
    new.external_id = fn(parse_external_id, 'state_charge_id', proto)
    new.offense_date = fn(parse_date, 'offense_date', proto)
    new.date_charged = fn(parse_date, 'date_charged', proto)
    new.state_code = parse_region_code_with_override(
        proto, 'state_code', metadata)
    new.county_code = fn(normalize, 'county_code', proto)
    new.statute = fn(normalize, 'statute', proto)

    new.ncic_code = fn(normalize, 'ncic_code', proto)
    new.description = fn(normalize, 'description', proto)
    if new.description is None and new.ncic_code is not None:
        ncic_description = ncic.get_description(new.ncic_code)
        if ncic_description:
            new.description = normalize(ncic_description)

    new.attempted = fn(parse_bool, 'attempted', proto)
    if new.classification_type is None:
        new.classification_type = \
            StateChargeClassificationType.find_in_string(new.description)
    new.classification_subtype = \
        fn(normalize, 'classification_subtype', proto)
    new.counts = fn(parse_int, 'counts', proto)
    new.charge_notes = fn(normalize, 'charge_notes', proto)
    new.is_controlling = fn(parse_bool, 'is_controlling', proto)
    new.charging_entity = fn(normalize, 'charging_entity', proto)

示例#11

0

显示文件

def _us_mo_get_pfi_info_for_period_if_commitment_from_supervision(
    incarceration_period_list_index: int,
    sorted_incarceration_periods: List[StateIncarcerationPeriod],
) -> PurposeForIncarcerationInfo:
    """Infers the correct purpose_for_incarceration values for sanction admissions to
    periods that don't have the correct values added at ingest-time. Looks at the
    treatment and shock incarceration codes in the admission_reason_raw_text to
    determine what kind of sanction admission occurred."""
    ip = sorted_incarceration_periods[incarceration_period_list_index]
    pfi_override = None

    if (ip.admission_reason
            == StateIncarcerationPeriodAdmissionReason.SANCTION_ADMISSION
            and ip.specialized_purpose_for_incarceration
            not in SANCTION_ADMISSION_PURPOSE_FOR_INCARCERATION_VALUES
            and ip.admission_reason_raw_text is not None):
        # Find the correct pfi for this sanction admission
        status_codes = normalize(
            ip.admission_reason_raw_text,
            remove_punctuation=True,
        ).split(" ")

        num_treatment_status_codes = 0
        num_shock_status_codes = 0

        for code in status_codes:
            if code in TREATMENT_SANCTION_STATUS_CODES:
                num_treatment_status_codes += 1
            if code in SHOCK_SANCTION_STATUS_CODES:
                num_shock_status_codes += 1

        if num_treatment_status_codes == 0 and num_shock_status_codes == 0:
            raise ValueError(
                "admission_reason_raw_text: "
                f"[{ip.admission_reason_raw_text}] is being "
                "mapped to a SANCTION_ADMISSION without containing "
                "any sanction admission status codes.")

        pfi_override = (
            StateSpecializedPurposeForIncarceration.SHOCK_INCARCERATION
            # We don't ever expect to see a mix of treatment and shock codes,
            # but we handle this rare case by prioritizing TREATMENT_IN_PRISON
            if num_shock_status_codes > num_treatment_status_codes else
            StateSpecializedPurposeForIncarceration.TREATMENT_IN_PRISON)
    return PurposeForIncarcerationInfo(
        purpose_for_incarceration=(pfi_override or
                                   ip.specialized_purpose_for_incarceration),
        # There are no defined pfi subtypes for US_MO
        purpose_for_incarceration_subtype=None,
    )

示例#12

0

显示文件

        def add(self,
                label: str,
                mapped_enum: EntityEnum,
                from_field: EntityEnumMeta = None) -> 'EnumOverrides.Builder':
            """Adds a mapping from |label| to |mapped_enum|. As |label| must be a string, the provided field value must
            match the string exactly to constitute a match.

            Optionally, the |from_field| parameter allows values to be mapped across fields. For example:
            `add('PENDING', BondStatus.PENDING, BondType)` remaps the bond_type field to a bond_status when the
            bond_type is set to 'PENDING'. Mappings *between* entity types are not allowed.
            """
            if from_field is None:
                from_field = mapped_enum.__class__
            label = normalize(label, remove_punctuation=True)
            self._str_mappings_dict[from_field][label] = mapped_enum
            return self

示例#13

0

显示文件

    def test_all_enum_values_covered_in_default_overrides(self) -> None:
        enum_classes = self._get_all_state_enum_classes()
        self.assertTrue(len(enum_classes) > 0)
        for entity_enum_cls in enum_classes:
            default_enum_mappings = getattr(entity_enum_cls, "_get_default_map")()

            for entity_enum in entity_enum_cls:
                normalized_value = normalize(entity_enum.value, remove_punctuation=True)
                self.assertIn(
                    normalized_value,
                    default_enum_mappings,
                    f"[{normalized_value}] not found in "
                    f"{entity_enum_cls} default mappings.",
                )

                self.assertEqual(default_enum_mappings[normalized_value], entity_enum)

示例#14

0

显示文件

文件： state_enums_test.py 项目： teymour-aldridge/pulse-data

    def test_all_enum_values_covered_in_default_overrides(self):
        enum_classes = self._get_all_state_enum_classes()
        self.assertTrue(len(enum_classes) > 0)
        for entity_enum_cls in enum_classes:
            # pylint: disable=protected-access
            default_enum_mappings = entity_enum_cls._get_default_map()

            for entity_enum in entity_enum_cls:
                normalized_value = normalize(entity_enum.value,
                                             remove_punctuation=True)
                self.assertIn(
                    normalized_value, default_enum_mappings,
                    f'[{normalized_value}] not found in '
                    f'{entity_enum_cls} default mappings.')

                self.assertEqual(default_enum_mappings[normalized_value],
                                 entity_enum)

示例#15

0

显示文件

    def _parse_to_enum(cls, label: str, enum_overrides: 'EnumOverrides') \
            -> Optional['EntityEnum']:
        """Attempts to parse |label| using the default map of |cls| and the
        provided |override_map|. Ignores punctuation by treating punctuation as
        a separator, e.g. `(N/A)` will map to the same value as `N A`."""
        label = normalize(label, remove_punctuation=True)
        if enum_overrides.should_ignore(label, cls):
            return None

        overridden_value = enum_overrides.parse(label, cls)
        if overridden_value is not None:
            return overridden_value

        complete_map = cls._get_default_map()
        try:
            return complete_map[label]
        except KeyError:
            raise EnumParsingError(cls, label)

示例#16

0

显示文件

    def parse(self,
              label: str,
              enum_class: EntityEnumMeta) -> Optional[EntityEnum]:
        label = normalize(label, remove_punctuation=True)
        if self.should_ignore(label, enum_class):
            return None

        direct_lookup = self._maps[enum_class].get(label)
        if direct_lookup:
            return direct_lookup

        matches = {matcher.value for matcher in self._predicate_maps[enum_class]
                   if matcher.predicate(label)}
        if len(matches) > 1:
            raise ValueError("Overrides map matched too many values from label"
                             " {}: [{}]".format(label, matches))
        if matches:
            return matches.pop()

        return None

示例#17

0

显示文件

文件： enum_overrides.py 项目： jazzPouls/pulse-data

    def parse(self, label: str,
              enum_class: EntityEnumType) -> Optional[EntityEnum]:
        label = normalize(label, remove_punctuation=True)
        if self.should_ignore(label, enum_class):
            return None

        direct_lookup = self._str_mappings_dict[enum_class].get(label)
        if direct_lookup:
            return direct_lookup

        matches = {
            mapper(label)
            for mapper in self._mappers_dict[enum_class]
            if mapper(label) is not None
        }
        if len(matches) > 1:
            raise ValueError(
                "Overrides map matched too many values from label {}: [{}]".
                format(label, matches))
        if matches:
            return matches.pop()
        return None

示例#18

0

显示文件

    def convert_field_value(field: attr.Attribute,
                            field_value: Union[str, EnumParser]) -> Any:
        if field_value is None:
            return None

        if is_forward_ref(field) or is_list(field):
            return field_value

        if isinstance(field_value, str):
            if not field_value or not field_value.strip():
                return None

        if field.name in converter_overrides:
            converter = converter_overrides[field.name]
            if not isinstance(field_value, converter.field_type):
                raise ValueError(
                    f"Found converter for field [{field.name}] in the converter_overrides, but expected "
                    f"field type [{converter.field_type}] does not match actual field type "
                    f"[{type(field_value)}]")
            return converter.convert(field_value)

        if isinstance(field_value, EnumParser):
            if is_enum(field):
                return field_value.parse()
            raise ValueError(
                f"Found field value [{field_value}] for field that is not an enum [{field}]."
            )

        if isinstance(field_value, str):
            if is_str(field):
                return normalize(field_value)
            if is_date(field):
                return parse_date(field_value)
            if is_int(field):
                return parse_int(field_value)
            if field.type in {bool, Union[bool, None]}:
                return parse_bool(field_value)

        raise ValueError(f"Unsupported field {field.name}")

示例#19

0

显示文件

文件： converter_utils.py 项目： teymour-aldridge/pulse-data

def create_comma_separated_list(proto, field_name: str):
    """Returns a normalized, comma-separated string for the list field with the
    given |field_name| on the given |proto|."""
    return ', '.join(
        [normalize(value) for value in getattr(proto, field_name)])

示例#20

0

显示文件

文件： enum_overrides.py 项目： jazzPouls/pulse-data

 def should_ignore(self, label: str, enum_class: EntityEnumType) -> bool:
     label = normalize(label, remove_punctuation=True)
     predicate_calls = (
         predicate(label)
         for predicate in self._ignore_predicates_dict[enum_class])
     return label in self._ignores[enum_class] or any(predicate_calls)

示例#21

0

显示文件

文件： converter_utils.py 项目： teymour-aldridge/pulse-data

def parse_external_id(id_str):
    """If the supplied |id_str| is generated, returns None. Otherwise
    returns the normalized version of the provided |id_str|"""
    if common_utils.is_generated_id(id_str):
        return None
    return normalize(id_str)