def get_or_create_placeholder_child(parent_entity: DatabaseEntity, child_field_name: str, child_class: Type[DatabaseEntity], **child_kwargs): """Checks all the entities in the |parent_entity|'s field |child_field_name|. If there is a placeholder entity, returns that. Otherwise creates a new placeholder entity of type |child_class| on the parent's |child_field_name| using |child_kw_args|. """ children = parent_entity.get_field_as_list(child_field_name) placeholder_children = [c for c in children if is_placeholder(c)] if placeholder_children: return placeholder_children[0] logging.info( 'No placeholder children on entity with id [%s] of type [%s] exist on field [%s]. Have to create one.', parent_entity.get_external_id(), parent_entity.get_entity_name(), child_field_name) new_child = child_class(**child_kwargs) if not is_placeholder(new_child): raise EntityMatchingError( f'Child created with kwargs is not a placeholder [{child_kwargs}]', parent_entity.get_entity_name()) children.append(new_child) parent_entity.set_field_from_list(child_field_name, children) return new_child
def resolve_child_match_result(): """Resolves any child matches by moving matched children off of their DB placeholder parent and onto the ingested, unmatched entity. """ if not child_field_name or not child_match_result: raise EntityMatchingError( f"Expected child_field_name and child_match_result to be set, " f"but instead got {child_field_name} and {child_match_result} " f"respectively.", ingested_unmatched_entity_tree.entity.get_entity_name()) # If child is unmatched, keep track of unchanged child if not child_match_result.merged_entity_trees: updated_child_trees.append(child_match_result.ingested_entity_tree) else: # For each matched child, remove child from the DB placeholder and # keep track of merged child(ren). for merged_child_tree in child_match_result.merged_entity_trees: updated_child_trees.append(merged_child_tree) placeholder_tree = merged_child_tree.generate_parent_tree() remove_child_from_entity( entity=placeholder_tree.entity, child_field_name=child_field_name, child_to_remove=merged_child_tree.entity) # For now we only handle the case where all placeholders with # matched children have the same parent chain. If they do not, # we throw an error. if ancestor_chain_updated: if ancestor_chain_updated != \ placeholder_tree.ancestor_chain: raise EntityMatchingError( f"Expected all placeholder DB entities matched to " f"an ingested unmatched entity to have the same " f"ancestor chain, but they did not. Found " f"conflicting ancestor chains: " f"{ancestor_chain_updated} and " f"{placeholder_tree.ancestor_chain}", ingested_entity.get_entity_name()) else: ancestor_chain_updated.extend( placeholder_tree.ancestor_chain)
def get_external_ids_of_cls(persons: List[StatePerson], cls: Type) -> Set[str]: """Returns the external ids of all entities of type |cls| found in the provided |persons| trees. """ ids: Set[str] = set() entities = _get_all_entities_of_cls(persons, cls) for entity in entities: if isinstance(entity, StatePerson): if not entity.external_ids: raise EntityMatchingError( 'No found external_ids on provided person', cls.__name__) ids.update([ex.external_id for ex in entity.external_ids]) else: entity = cast(ExternalIdEntity, entity) if not entity.external_id: raise EntityMatchingError( f'Expected all external_ids to be present in cls ' f'[{cls.__name__}]', cls.__name__) ids.add(entity.external_id) return ids
def _remove_suffix_from_violation_entity( violation_entities: List[Union[schema.StateSupervisionViolation, schema.StateSupervisionViolationResponse]]): for entity in violation_entities: if not entity.external_id: continue splits = entity.external_id.rsplit('-', 2) if len(splits) != 3: raise EntityMatchingError(f'Unexpected id format for {entity.get_entity_name()}{entity.external_id}', entity.get_entity_name()) entity.external_id = splits[0]
def __init__(self, entity: Entity, ancestor_chain: List[Entity]): if not entity: raise EntityMatchingError( "When creating EntityTree object, entity field must be set", 'entity_tree') # The final child in this EntityTree. self.entity = entity # The list of ancestors for the entity above. This list is ordered from # furthest to closest ancestor. self.ancestor_chain = ancestor_chain[:]
def _get_sequence_no(period: schema.StateIncarcerationPeriod) -> int: """Extracts the ND specific Movement Sequence Number from the external id of the provided |period|. """ try: external_id = cast(str, period.external_id) sequence_no = int(external_id.split('-')[-1]) except Exception: raise EntityMatchingError( f"Could not parse sequence number from external_id " f"{period.external_id}", period.get_entity_name()) return sequence_no
def resolve_child_match_result(): """Keeps track of all matched and unmatched children.""" if not child_match_result: raise EntityMatchingError( f"Expected child_match_result to be set, but instead got " f"{child_match_result}", ingested_entity_tree.entity.get_entity_name()) if not child_match_result.merged_entity_trees: updated_child_trees.append(child_match_result.ingested_entity_tree) else: updated_child_trees.extend(child_match_result.merged_entity_trees)
def _is_match(*, ingested_entity: Entity, db_entity: Entity) -> bool: """Returns true if the provided |ingested_entity| matches the provided |db_entity|. Otherwise returns False. """ if not ingested_entity or not db_entity: return ingested_entity == db_entity if ingested_entity.__class__ != db_entity.__class__: raise EntityMatchingError( f"is_match received entities of two different classes: " f"ingested entity {ingested_entity.__class__.__name__} and " f"db_entity {db_entity.__class__.__name__}", ingested_entity.get_entity_name()) if isinstance(ingested_entity, StatePerson): db_entity = cast(StatePerson, db_entity) for ingested_external_id in ingested_entity.external_ids: for db_external_id in db_entity.external_ids: if _is_match(ingested_entity=ingested_external_id, db_entity=db_external_id): return True return False if isinstance(ingested_entity, StatePersonExternalId): db_entity = cast(StatePersonExternalId, db_entity) return ingested_entity.state_code == db_entity.state_code \ and ingested_entity.external_id == db_entity.external_id \ and ingested_entity.id_type == db_entity.id_type # As person has already been matched, assume that any of these 'person # attribute' entities are matches if their state_codes align. if isinstance(ingested_entity, StatePersonAlias): db_entity = cast(StatePersonAlias, db_entity) return ingested_entity.state_code == db_entity.state_code \ and ingested_entity.full_name == db_entity.full_name if isinstance(ingested_entity, StatePersonRace): db_entity = cast(StatePersonRace, db_entity) return ingested_entity.state_code == db_entity.state_code \ and ingested_entity.race == db_entity.race if isinstance(ingested_entity, StatePersonEthnicity): db_entity = cast(StatePersonEthnicity, db_entity) return ingested_entity.state_code == db_entity.state_code \ and ingested_entity.ethnicity == db_entity.ethnicity db_entity = cast(ExternalIdEntity, db_entity) ingested_entity = cast(ExternalIdEntity, ingested_entity) # Placeholders entities are considered equal if ingested_entity.external_id is None and db_entity.external_id is None: return is_placeholder(ingested_entity) and is_placeholder(db_entity) return ingested_entity.external_id == db_entity.external_id
def get_set_entity_field_names( entity: Entity, entity_field_type: EntityFieldType) -> Set[str]: """Returns a set of field_names that correspond to any set fields on the provided |entity| that match the provided |entity_field_type|. """ if entity.get_entity_name().startswith('state_'): direction_checker = SchemaEdgeDirectionChecker.state_direction_checker() else: direction_checker = \ SchemaEdgeDirectionChecker.county_direction_checker() back_edges = set() forward_edges = set() flat_fields = set() for field, _ in attr.fields_dict(entity.__class__).items(): v = getattr(entity, field) if v is None: continue # TODO(1908): Update traversal logic if relationship fields can be # different types aside from Entity and List if issubclass(type(v), Entity): is_back_edge = direction_checker.is_back_edge(entity, field) if is_back_edge: back_edges.add(field) else: forward_edges.add(field) elif isinstance(v, list): # Disregard empty lists if not v: continue is_back_edge = direction_checker.is_back_edge(entity, field) if is_back_edge: back_edges.add(field) else: forward_edges.add(field) else: flat_fields.add(field) if entity_field_type is EntityFieldType.FLAT_FIELD: return flat_fields if entity_field_type is EntityFieldType.FORWARD_EDGE: return forward_edges if entity_field_type is EntityFieldType.BACK_EDGE: return back_edges raise EntityMatchingError( f"Unrecognized EntityFieldType {entity_field_type}", 'entity_field_type')
def _get_all_entity_field_names(entity: Entity, entity_field_type: EntityFieldType, direction_checker): """Returns a set of field_names that correspond to any set fields on the provided Entity |entity| that match the provided |entity_field_type|. """ back_edges = set() forward_edges = set() flat_fields = set() for field, _ in attr.fields_dict(entity.__class__).items(): v = getattr(entity, field) if v is None: continue # TODO(1908): Update traversal logic if relationship fields can be # different types aside from Entity and List if issubclass(type(v), Entity): is_back_edge = direction_checker.is_back_edge(entity, field) if is_back_edge: back_edges.add(field) else: forward_edges.add(field) elif isinstance(v, list): # Disregard empty lists if not v: continue is_back_edge = direction_checker.is_back_edge(entity, field) if is_back_edge: back_edges.add(field) else: forward_edges.add(field) else: flat_fields.add(field) if entity_field_type is EntityFieldType.FLAT_FIELD: return flat_fields if entity_field_type is EntityFieldType.FOREIGN_KEYS: return set() # Entity objects never have foreign keys if entity_field_type is EntityFieldType.FORWARD_EDGE: return forward_edges if entity_field_type is EntityFieldType.BACK_EDGE: return back_edges if entity_field_type is EntityFieldType.ALL: return flat_fields | forward_edges | back_edges raise EntityMatchingError( f"Unrecognized EntityFieldType {entity_field_type}", 'entity_field_type')
def revoked_to_prison(svr: schema.StateSupervisionViolationResponse) -> bool: """Determines if the provided |svr| resulted in a revocation.""" if not svr.revocation_type: return False reincarceration_types = [ StateSupervisionViolationResponseRevocationType.REINCARCERATION.value, StateSupervisionViolationResponseRevocationType.SHOCK_INCARCERATION.value, StateSupervisionViolationResponseRevocationType.TREATMENT_IN_PRISON.value] non_reincarceration_types = [ StateSupervisionViolationResponseRevocationType.RETURN_TO_SUPERVISION.value] if svr.revocation_type in reincarceration_types: return True if svr.revocation_type in non_reincarceration_types: return False raise EntityMatchingError(f"Unexpected StateSupervisionViolationRevocationType {svr.revocation_type}.", svr.get_entity_name())
def __init__(self, ingested_entity_tree: EntityTree, merged_entity_trees: List[EntityTree], error_count: int): if not ingested_entity_tree: raise EntityMatchingError( "When creating IndividualMatchResult object, " "ingested_entity_tree field must be set", 'individual_match_result') # The initial EntityTree to be matched to DB EntityTrees. self.ingested_entity_tree = ingested_entity_tree # If matching was successful, these are results of merging the # ingested_entity_tree with any of its DB matches. self.merged_entity_trees = merged_entity_trees # The number of errors encountered while matching this entity. self.error_count = error_count
def is_incarceration_period_match( ingested_entity: Union[EntityTree, StateBase], db_entity: Union[EntityTree, StateBase], ) -> bool: """ Determines if the provided |ingested_entity| matches the |db_entity| based on ND specific StateIncarcerationPeriod matching. """ if isinstance(ingested_entity, EntityTree): db_entity = cast(EntityTree, db_entity.entity) ingested_entity = ingested_entity.entity ingested_entity = cast(schema.StateIncarcerationPeriod, ingested_entity) db_entity = cast(schema.StateIncarcerationPeriod, db_entity) # Enforce that all objects being compared are for US_ND if ingested_entity.state_code != "US_ND" or db_entity.state_code != "US_ND": return False ingested_complete = is_incarceration_period_complete(ingested_entity) db_complete = is_incarceration_period_complete(db_entity) if not ingested_complete and not db_complete: return is_incomplete_incarceration_period_match(ingested_entity, db_entity) if ingested_complete and db_complete: return ingested_entity.external_id == db_entity.external_id # Only one of the two is complete complete, incomplete = ( (ingested_entity, db_entity) if ingested_complete else (db_entity, ingested_entity) ) complete_external_ids = complete.external_id.split( _INCARCERATION_PERIOD_ID_DELIMITER ) incomplete_external_id = incomplete.external_id if len(complete_external_ids) != 2: raise EntityMatchingError( f"Could not split external id [{complete.external_id}] of complete incarceration period [{complete}] as " f"expected", ingested_entity.get_entity_name(), ) return incomplete_external_id in complete_external_ids
def _match_entity_tree(self, *, ingested_entity_tree: EntityTree, db_entity_trees: List[EntityTree], matched_entities_by_db_ids: Dict[ int, List[DatabaseEntity]], root_entity_cls: Type) -> IndividualMatchResult: if (isinstance(ingested_entity_tree.entity, self.erroring_class) and ingested_entity_tree.entity.get_external_id() in self.erroring_external_ids): raise EntityMatchingError( "error!", ingested_entity_tree.entity.get_entity_name()) return super()._match_entity_tree( ingested_entity_tree=ingested_entity_tree, db_entity_trees=db_entity_trees, matched_entities_by_db_ids=matched_entities_by_db_ids, root_entity_cls=root_entity_cls, )
def get_external_ids_of_cls(persons: List[schema.StatePerson], cls: Type[DatabaseEntity]) -> Set[str]: """Returns the external ids of all entities of type |cls| found in the provided |persons| trees. """ check_all_objs_have_type(persons, schema.StatePerson) ids: Set[str] = set() entities = get_all_entities_of_cls(persons, cls) for entity in entities: external_ids = get_external_ids_from_entity(entity) if not external_ids: raise EntityMatchingError( f'Expected all external_ids to be present in cls ' f'[{cls.__name__}]', cls.__name__) ids.update(external_ids) return ids
def set_field_from_list(entity: Entity, field_name: str, value: List): """Given the provided |value|, sets the value onto the provided |entity| based on the given |field_name|. """ field = get_field(entity, field_name) if isinstance(field, list): set_field(entity, field_name, value) else: if not value: set_field(entity, field_name, None) elif len(value) == 1: set_field(entity, field_name, value[0]) else: raise EntityMatchingError( f"Attempting to set singular field: {field_name} on entity: " f"{entity.get_entity_name()}, but got multiple values: " f"{value}.", entity.get_entity_name())
def _set_preceding_admission_reason( idx: int, sorted_ips: List[schema.StateIncarcerationPeriod], overrides: EnumOverrides, ) -> None: """ Given a list of |sorted_ips| and an index |idx| which corresponds to a DOCR incarceration period, we select the admission reason of the most closely preceding period of temporary custody that is consecutive with the DOCR incarceration period. """ beginning_ip = sorted_ips[idx] if _is_hold(beginning_ip): raise EntityMatchingError( f"Expected beginning_ip to NOT be a hold, instead found [{beginning_ip}] with " f"incarceration type [{beginning_ip.incarceration_type}]", "incarceration_period", ) earliest_hold_admission_raw_text = None subsequent_ip = None while idx >= 0: ip = sorted_ips[idx] if not subsequent_ip: subsequent_ip = ip idx = idx - 1 continue if not _is_hold(ip) or not _are_consecutive(ip, subsequent_ip): break earliest_hold_admission_raw_text = ip.admission_reason_raw_text subsequent_ip = ip idx = idx - 1 # Update the original incarceration period's admission reason if necessary. if ( earliest_hold_admission_raw_text and beginning_ip.admission_reason == StateIncarcerationPeriodAdmissionReason.TRANSFER.value ): beginning_ip.admission_reason = StateIncarcerationPeriodAdmissionReason.parse( earliest_hold_admission_raw_text, overrides ).value
def get_external_ids_of_cls(persons: List[schema.StatePerson], cls: Type[DatabaseEntity]) -> Set[str]: """Returns the external ids of all entities of type |cls| found in the provided |persons| trees. """ check_all_objs_have_type(persons, schema.StatePerson) ids: Set[str] = set() entities = get_all_entities_of_cls(persons, cls) for entity in entities: external_ids = get_external_ids_from_entity(entity) if not external_ids: raise EntityMatchingError( f"Expected external_ids to be non-empty for entity [{entity}] with class [{cls.__name__}]", entity.get_class_id_name(), ) ids.update(external_ids) return ids
def add_child_to_entity(*, entity: Entity, child_field_name: str, child_to_add: Entity): """Adds the |child_to_add| to the |child_field_name| field on the |entity|. """ child_field = get_field(entity, child_field_name) if isinstance(child_field, list): if child_to_add not in child_field: child_field.append(child_to_add) else: if child_field and child_field != child_to_add: raise EntityMatchingError( f"Attempting to add child {child_to_add} to entity {entity}, " f"but {child_field_name} already had different value " f"{child_field}", entity.get_entity_name()) child_field = child_to_add set_field(entity, child_field_name, child_field)
def _nd_read_people(session: Session, region: str, ingested_people: List[StatePerson]) -> List[StatePerson]: """ND specific code that looks up all people necessary for entity matching based on the provided |region| and |ingested_people|. """ root_entity_cls = get_root_entity_cls(ingested_people) if root_entity_cls not in (StatePerson, StateSentenceGroup): raise EntityMatchingError( f'For region [{region}] found unexpected root_entity_cls: ' f'[{root_entity_cls.__name__}]', 'root_entity_cls') root_external_ids = get_external_ids_of_cls(ingested_people, root_entity_cls) return dao.read_people_by_cls_external_ids(session, region, root_entity_cls, root_external_ids, populate_back_edges=False)
def get_root_entity_cls(ingested_persons: List[StatePerson]) -> Type: """ Attempts to find the highest entity class within the |ingested_persons| for which objects are not placeholders. Returns the class if found, otherwise raises. Note: This should only be used with persons ingested from a region directly (and not with persons post entity matching), as this function uses DFS to find the root entity cls. This therefore assumes that a) the passed in StatePersons are trees and not DAGs (one parent per entity) and b) that the structure of the passed in graph is symmetrical. """ root_cls = None if ingested_persons: root_cls = _get_root_entity_helper(ingested_persons[0]) if root_cls is None: raise EntityMatchingError( "Could not find root class for ingested persons", 'state_person') return root_cls
def add_child_to_entity(*, entity: DatabaseEntity, child_field_name: str, child_to_add: DatabaseEntity): """Adds the |child_to_add| to the |child_field_name| field on the |entity|. """ child_field = entity.get_field(child_field_name) if isinstance(child_field, list): if child_to_add not in child_field: child_field.append(child_to_add) else: if child_field and child_field != child_to_add: raise EntityMatchingError( f"Attempting to add child [{child_to_add}] to entity [{entity}], but field [{child_field_name}] " f"already had different value [{child_field}]", entity.get_entity_name()) child_field = child_to_add entity.set_field(child_field_name, child_field)
def resolve_child_match_result(): """Resolves any child matches by removing the child from the ingested placeholder entity and adding the child onto the corresponding DB entity. """ if not child_field_name or not child_match_result: raise EntityMatchingError( f"Expected child_field_name and child_match_result to be set, " f"but instead got {child_field_name} and {child_match_result} " f"respectively.", ingested_placeholder_tree.entity.get_entity_name()) # If the child wasn't matched, leave it on the placeholder object. if not child_match_result.merged_entity_trees: placeholder_children.append( child_match_result.ingested_entity_tree.entity) return # Ensure the merged children are on the correct entity for merged_child_tree in child_match_result.merged_entity_trees: merged_parent_tree = merged_child_tree.generate_parent_tree() # If one of the merged parents is the ingested placeholder entity, # simply keep track of the child in placeholder_children. if merged_parent_tree.entity == ingested_placeholder_tree.entity: placeholder_children.append( child_match_result.ingested_entity_tree.entity) continue add_child_to_entity(entity=merged_parent_tree.entity, child_field_name=child_field_name, child_to_add=merged_child_tree.entity) # Keep track of all db parents of the merged children. updated_entities = [m.entity for m in updated_entity_trees] if merged_parent_tree.entity not in updated_entities: _add_match_to_matched_entities_cache( db_entity_match=merged_parent_tree.entity, ingested_entity=ingested_placeholder_tree.entity, matched_entities_by_db_ids=matched_entities_by_db_ids) updated_entity_trees.append(merged_parent_tree)
def _is_hold(ip: schema.StateIncarcerationPeriod) -> bool: """Determines if the provided |ip| represents a temporary hold and not a stay in a DOCR overseen facility. """ # Everything before July 1, 2017 was overseen by DOCR. if ip.admission_date < datetime.date(year=2017, month=7, day=1): return False hold_types = [ StateIncarcerationType.COUNTY_JAIL.value, StateIncarcerationType.EXTERNAL_UNKNOWN.value] non_hold_types = [StateIncarcerationType.STATE_PRISON.value] if ip.incarceration_type in hold_types: return True if ip.incarceration_type in non_hold_types: return False raise EntityMatchingError( f"Unexpected StateIncarcerationType" f"{ip.incarceration_type}.", ip.get_entity_name())
def _get_all_database_entity_field_names(entity: DatabaseEntity, entity_field_type: EntityFieldType, direction_checker): """Returns a set of field_names that correspond to any set fields on the provided DatabaseEntity |entity| that match the provided |entity_field_type|. """ back_edges = set() forward_edges = set() flat_fields = set() foreign_keys = set() for relationship_field_name in entity.get_relationship_property_names(): if direction_checker.is_back_edge(entity, relationship_field_name): back_edges.add(relationship_field_name) else: forward_edges.add(relationship_field_name) for foreign_key_name in entity.get_foreign_key_names(): foreign_keys.add(foreign_key_name) for column_field_name in entity.get_column_property_names(): if column_field_name not in foreign_keys: flat_fields.add(column_field_name) if entity_field_type is EntityFieldType.FLAT_FIELD: return flat_fields if entity_field_type is EntityFieldType.FOREIGN_KEYS: return foreign_keys if entity_field_type is EntityFieldType.FORWARD_EDGE: return forward_edges if entity_field_type is EntityFieldType.BACK_EDGE: return back_edges if entity_field_type is EntityFieldType.ALL: return flat_fields | foreign_keys | forward_edges | back_edges raise EntityMatchingError( f"Unrecognized EntityFieldType {entity_field_type}", 'entity_field_type')
def read_persons_by_root_entity_cls( session: Session, region: str, ingested_people: List[schema.StatePerson], allowed_root_entity_classes: Optional[List[Type[DatabaseEntity]]], ) -> List[schema.StatePerson]: """Looks up all people necessary for entity matching based on the provided |region| and |ingested_people|. If |allowed_root_entity_classes| is provided, throw an error if any unexpected root entity class is found. """ root_entity_cls = get_root_entity_cls(ingested_people) if allowed_root_entity_classes and root_entity_cls \ not in allowed_root_entity_classes: raise EntityMatchingError( f'For region [{region}] found unexpected root_entity_cls: ' f'[{root_entity_cls.__name__}]', 'root_entity_cls') root_external_ids = get_external_ids_of_cls( ingested_people, root_entity_cls) logging.info("[Entity Matching] Reading [%s] external ids of class [%s]", len(root_external_ids), root_entity_cls.__name__) persons_by_root_entity = dao.read_people_by_cls_external_ids( session, region, root_entity_cls, root_external_ids) placeholder_persons = dao.read_placeholder_persons(session) # When the |root_entity_cls| is not StatePerson, it is possible for both # persons_by_root_entity and placeholder_persons to contain the same # placeholder person(s). For this reason, we dedup people across both lists # before returning. deduped_people = [] seen_person_ids: Set[int] = set() for person in persons_by_root_entity + placeholder_persons: if person.person_id not in seen_person_ids: deduped_people.append(person) seen_person_ids.add(person.person_id) return deduped_people
def _merge_incomplete_periods( a: StateIncarcerationPeriod, b: StateIncarcerationPeriod) \ -> StateIncarcerationPeriod: if bool(a.admission_date) and bool(b.release_date): admission_period, release_period = a, b elif bool(a.release_date) and bool(b.admission_date): admission_period, release_period = b, a else: raise EntityMatchingError( f"Expected one admission period and one release period when " f"merging, instead found periods: {a}, {b}", a.get_entity_name()) merged_period = attr.evolve(admission_period) admission_external_id = admission_period.external_id or '' release_external_id = release_period.external_id or '' new_external_id = admission_external_id \ + _INCARCERATION_PERIOD_ID_DELIMITER \ + release_external_id _default_merge_flat_fields(new_entity=release_period, old_entity=merged_period) merged_period.external_id = new_external_id return merged_period
def admitted_for_revocation(ip: StateIncarcerationPeriod) -> bool: """Determines if the provided |ip| began because of a revocation.""" if not ip.admission_reason: return False revocation_types = [ StateIncarcerationPeriodAdmissionReason.PAROLE_REVOCATION, StateIncarcerationPeriodAdmissionReason.PROBATION_REVOCATION ] non_revocation_types = [ StateIncarcerationPeriodAdmissionReason.ADMITTED_IN_ERROR, StateIncarcerationPeriodAdmissionReason.EXTERNAL_UNKNOWN, StateIncarcerationPeriodAdmissionReason.NEW_ADMISSION, StateIncarcerationPeriodAdmissionReason.RETURN_FROM_ERRONEOUS_RELEASE, StateIncarcerationPeriodAdmissionReason.RETURN_FROM_ESCAPE, StateIncarcerationPeriodAdmissionReason.TRANSFER ] if ip.admission_reason in revocation_types: return True if ip.admission_reason in non_revocation_types: return False raise EntityMatchingError( f"Unexpected StateIncarcerationPeriodAdmissionReason " f"{ip.admission_reason}.", ip.get_entity_name())
def set_field(entity: Entity, field_name: str, value: Any): if not hasattr(entity, field_name): raise EntityMatchingError( f"Expected entity {entity} to have field {field_name}, but it did " f"not.", entity.get_entity_name()) return setattr(entity, field_name, value)
def _is_match(*, ingested_entity: DatabaseEntity, db_entity: DatabaseEntity) -> bool: """Returns true if the provided |ingested_entity| matches the provided |db_entity|. Otherwise returns False. """ if not ingested_entity or not db_entity: return ingested_entity == db_entity if ingested_entity.__class__ != db_entity.__class__: raise EntityMatchingError( f"is_match received entities of two different classes: " f"ingested entity {ingested_entity.__class__.__name__} and " f"db_entity {db_entity.__class__.__name__}", ingested_entity.get_entity_name()) if not isinstance(ingested_entity, DatabaseEntity): raise EntityMatchingError( f"Unexpected type for ingested entity[{type(ingested_entity)}]", 'unknown') if not isinstance(db_entity, DatabaseEntity): raise EntityMatchingError( f"Unexpected type for db entity[{type(db_entity)}]", 'unknown') if isinstance(ingested_entity, schema.StatePerson): db_entity = cast(schema.StatePerson, db_entity) for ingested_external_id in ingested_entity.external_ids: for db_external_id in db_entity.external_ids: if _is_match(ingested_entity=ingested_external_id, db_entity=db_external_id): return True return False # Aside from people, all entities are state specific. if ingested_entity.get_field('state_code') \ != db_entity.get_field('state_code'): return False # TODO(2671): Update all person attributes below to use complete entity # equality instead of just comparing individual fields. if isinstance(ingested_entity, schema.StatePersonExternalId): db_entity = cast(schema.StatePersonExternalId, db_entity) return ingested_entity.external_id == db_entity.external_id \ and ingested_entity.id_type == db_entity.id_type # As person has already been matched, assume that any of these 'person # attribute' entities are matches if specific attributes match. if isinstance(ingested_entity, schema.StatePersonAlias): db_entity = cast(schema.StatePersonAlias, db_entity) return ingested_entity.full_name == db_entity.full_name if isinstance(ingested_entity, schema.StatePersonRace): db_entity = cast(schema.StatePersonRace, db_entity) return ingested_entity.race == db_entity.race if isinstance(ingested_entity, schema.StatePersonEthnicity): db_entity = cast(schema.StatePersonEthnicity, db_entity) return ingested_entity.ethnicity == db_entity.ethnicity if isinstance(ingested_entity, (schema.StateSupervisionViolationResponseDecisionEntry, schema.StateSupervisionViolatedConditionEntry, schema.StateSupervisionViolationTypeEntry, schema.StateSupervisionCaseTypeEntry)): return _base_entity_match(ingested_entity, db_entity) # Placeholders entities are considered equal if ingested_entity.get_external_id() is None \ and db_entity.get_external_id() is None: return is_placeholder(ingested_entity) and is_placeholder(db_entity) return ingested_entity.get_external_id() == db_entity.get_external_id()