def get_or_create_placeholder_child(parent_entity: DatabaseEntity,
                                    child_field_name: str,
                                    child_class: Type[DatabaseEntity],
                                    **child_kwargs):
    """Checks all the entities in the |parent_entity|'s field |child_field_name|. If there is a placeholder entity,
    returns that. Otherwise creates a new placeholder entity of type |child_class| on the parent's |child_field_name|
    using |child_kw_args|.
    """
    children = parent_entity.get_field_as_list(child_field_name)
    placeholder_children = [c for c in children if is_placeholder(c)]

    if placeholder_children:
        return placeholder_children[0]

    logging.info(
        'No placeholder children on entity with id [%s] of type [%s] exist on field [%s]. Have to create one.',
        parent_entity.get_external_id(), parent_entity.get_entity_name(),
        child_field_name)
    new_child = child_class(**child_kwargs)
    if not is_placeholder(new_child):
        raise EntityMatchingError(
            f'Child created with kwargs is not a placeholder [{child_kwargs}]',
            parent_entity.get_entity_name())

    children.append(new_child)
    parent_entity.set_field_from_list(child_field_name, children)
    return new_child
示例#2
0
    def resolve_child_match_result():
        """Resolves any child matches by moving matched children off of their DB
        placeholder parent and onto the ingested, unmatched entity.
        """
        if not child_field_name or not child_match_result:
            raise EntityMatchingError(
                f"Expected child_field_name and child_match_result to be set, "
                f"but instead got {child_field_name} and {child_match_result} "
                f"respectively.",
                ingested_unmatched_entity_tree.entity.get_entity_name())

        # If child is unmatched, keep track of unchanged child
        if not child_match_result.merged_entity_trees:
            updated_child_trees.append(child_match_result.ingested_entity_tree)
        else:
            # For each matched child, remove child from the DB placeholder and
            # keep track of merged child(ren).
            for merged_child_tree in child_match_result.merged_entity_trees:
                updated_child_trees.append(merged_child_tree)
                placeholder_tree = merged_child_tree.generate_parent_tree()
                remove_child_from_entity(
                    entity=placeholder_tree.entity,
                    child_field_name=child_field_name,
                    child_to_remove=merged_child_tree.entity)

                # For now we only handle the case where all placeholders with
                # matched children have the same parent chain. If they do not,
                # we throw an error.
                if ancestor_chain_updated:
                    if ancestor_chain_updated != \
                            placeholder_tree.ancestor_chain:
                        raise EntityMatchingError(
                            f"Expected all placeholder DB entities matched to "
                            f"an ingested unmatched entity to have the same "
                            f"ancestor chain, but they did not. Found "
                            f"conflicting ancestor chains: "
                            f"{ancestor_chain_updated} and "
                            f"{placeholder_tree.ancestor_chain}",
                            ingested_entity.get_entity_name())
                else:
                    ancestor_chain_updated.extend(
                        placeholder_tree.ancestor_chain)
示例#3
0
def get_external_ids_of_cls(persons: List[StatePerson], cls: Type) -> Set[str]:
    """Returns the external ids of all entities of type |cls| found in the
    provided |persons| trees.
    """
    ids: Set[str] = set()
    entities = _get_all_entities_of_cls(persons, cls)
    for entity in entities:
        if isinstance(entity, StatePerson):
            if not entity.external_ids:
                raise EntityMatchingError(
                    'No found external_ids on provided person', cls.__name__)
            ids.update([ex.external_id for ex in entity.external_ids])
        else:
            entity = cast(ExternalIdEntity, entity)
            if not entity.external_id:
                raise EntityMatchingError(
                    f'Expected all external_ids to be present in cls '
                    f'[{cls.__name__}]', cls.__name__)
            ids.add(entity.external_id)
    return ids
示例#4
0
def _remove_suffix_from_violation_entity(
        violation_entities:
        List[Union[schema.StateSupervisionViolation, schema.StateSupervisionViolationResponse]]):
    for entity in violation_entities:
        if not entity.external_id:
            continue
        splits = entity.external_id.rsplit('-', 2)
        if len(splits) != 3:
            raise EntityMatchingError(f'Unexpected id format for {entity.get_entity_name()}{entity.external_id}',
                                      entity.get_entity_name())
        entity.external_id = splits[0]
示例#5
0
    def __init__(self, entity: Entity, ancestor_chain: List[Entity]):
        if not entity:
            raise EntityMatchingError(
                "When creating EntityTree object, entity field must be set",
                'entity_tree')

        # The final child in this EntityTree.
        self.entity = entity

        # The list of ancestors for the entity above. This list is ordered from
        # furthest to closest ancestor.
        self.ancestor_chain = ancestor_chain[:]
def _get_sequence_no(period: schema.StateIncarcerationPeriod) -> int:
    """Extracts the ND specific Movement Sequence Number from the external id
    of the provided |period|.
    """
    try:
        external_id = cast(str, period.external_id)
        sequence_no = int(external_id.split('-')[-1])
    except Exception:
        raise EntityMatchingError(
            f"Could not parse sequence number from external_id "
            f"{period.external_id}", period.get_entity_name())
    return sequence_no
示例#7
0
    def resolve_child_match_result():
        """Keeps track of all matched and unmatched children."""
        if not child_match_result:
            raise EntityMatchingError(
                f"Expected child_match_result to be set, but instead got "
                f"{child_match_result}",
                ingested_entity_tree.entity.get_entity_name())

        if not child_match_result.merged_entity_trees:
            updated_child_trees.append(child_match_result.ingested_entity_tree)
        else:
            updated_child_trees.extend(child_match_result.merged_entity_trees)
示例#8
0
def _is_match(*, ingested_entity: Entity, db_entity: Entity) -> bool:
    """Returns true if the provided |ingested_entity| matches the provided
    |db_entity|. Otherwise returns False.
    """
    if not ingested_entity or not db_entity:
        return ingested_entity == db_entity

    if ingested_entity.__class__ != db_entity.__class__:
        raise EntityMatchingError(
            f"is_match received entities of two different classes: "
            f"ingested entity {ingested_entity.__class__.__name__} and "
            f"db_entity {db_entity.__class__.__name__}",
            ingested_entity.get_entity_name())

    if isinstance(ingested_entity, StatePerson):
        db_entity = cast(StatePerson, db_entity)
        for ingested_external_id in ingested_entity.external_ids:
            for db_external_id in db_entity.external_ids:
                if _is_match(ingested_entity=ingested_external_id,
                             db_entity=db_external_id):
                    return True
        return False

    if isinstance(ingested_entity, StatePersonExternalId):
        db_entity = cast(StatePersonExternalId, db_entity)
        return ingested_entity.state_code == db_entity.state_code \
               and ingested_entity.external_id == db_entity.external_id \
               and ingested_entity.id_type == db_entity.id_type

    # As person has already been matched, assume that any of these 'person
    # attribute' entities are matches if their state_codes align.
    if isinstance(ingested_entity, StatePersonAlias):
        db_entity = cast(StatePersonAlias, db_entity)
        return ingested_entity.state_code == db_entity.state_code \
               and ingested_entity.full_name == db_entity.full_name
    if isinstance(ingested_entity, StatePersonRace):
        db_entity = cast(StatePersonRace, db_entity)
        return ingested_entity.state_code == db_entity.state_code \
               and ingested_entity.race == db_entity.race
    if isinstance(ingested_entity, StatePersonEthnicity):
        db_entity = cast(StatePersonEthnicity, db_entity)
        return ingested_entity.state_code == db_entity.state_code \
               and ingested_entity.ethnicity == db_entity.ethnicity

    db_entity = cast(ExternalIdEntity, db_entity)
    ingested_entity = cast(ExternalIdEntity, ingested_entity)

    # Placeholders entities are considered equal
    if ingested_entity.external_id is None and db_entity.external_id is None:
        return is_placeholder(ingested_entity) and is_placeholder(db_entity)
    return ingested_entity.external_id == db_entity.external_id
示例#9
0
def get_set_entity_field_names(
        entity: Entity,
        entity_field_type: EntityFieldType) -> Set[str]:
    """Returns a set of field_names that correspond to any set fields on the
    provided |entity| that match the provided |entity_field_type|.
    """
    if entity.get_entity_name().startswith('state_'):
        direction_checker = SchemaEdgeDirectionChecker.state_direction_checker()
    else:
        direction_checker = \
            SchemaEdgeDirectionChecker.county_direction_checker()

    back_edges = set()
    forward_edges = set()
    flat_fields = set()
    for field, _ in attr.fields_dict(entity.__class__).items():
        v = getattr(entity, field)

        if v is None:
            continue

        # TODO(1908): Update traversal logic if relationship fields can be
        # different types aside from Entity and List
        if issubclass(type(v), Entity):
            is_back_edge = direction_checker.is_back_edge(entity, field)
            if is_back_edge:
                back_edges.add(field)
            else:
                forward_edges.add(field)
        elif isinstance(v, list):
            # Disregard empty lists
            if not v:
                continue
            is_back_edge = direction_checker.is_back_edge(entity, field)
            if is_back_edge:
                back_edges.add(field)
            else:
                forward_edges.add(field)
        else:
            flat_fields.add(field)

    if entity_field_type is EntityFieldType.FLAT_FIELD:
        return flat_fields
    if entity_field_type is EntityFieldType.FORWARD_EDGE:
        return forward_edges
    if entity_field_type is EntityFieldType.BACK_EDGE:
        return back_edges
    raise EntityMatchingError(
        f"Unrecognized EntityFieldType {entity_field_type}",
        'entity_field_type')
示例#10
0
def _get_all_entity_field_names(entity: Entity,
                                entity_field_type: EntityFieldType,
                                direction_checker):
    """Returns a set of field_names that correspond to any set fields on the
    provided Entity |entity| that match the provided |entity_field_type|.
    """
    back_edges = set()
    forward_edges = set()
    flat_fields = set()
    for field, _ in attr.fields_dict(entity.__class__).items():
        v = getattr(entity, field)

        if v is None:
            continue

        # TODO(1908): Update traversal logic if relationship fields can be
        # different types aside from Entity and List
        if issubclass(type(v), Entity):
            is_back_edge = direction_checker.is_back_edge(entity, field)
            if is_back_edge:
                back_edges.add(field)
            else:
                forward_edges.add(field)
        elif isinstance(v, list):
            # Disregard empty lists
            if not v:
                continue
            is_back_edge = direction_checker.is_back_edge(entity, field)
            if is_back_edge:
                back_edges.add(field)
            else:
                forward_edges.add(field)
        else:
            flat_fields.add(field)

    if entity_field_type is EntityFieldType.FLAT_FIELD:
        return flat_fields
    if entity_field_type is EntityFieldType.FOREIGN_KEYS:
        return set()  # Entity objects never have foreign keys
    if entity_field_type is EntityFieldType.FORWARD_EDGE:
        return forward_edges
    if entity_field_type is EntityFieldType.BACK_EDGE:
        return back_edges
    if entity_field_type is EntityFieldType.ALL:
        return flat_fields | forward_edges | back_edges
    raise EntityMatchingError(
        f"Unrecognized EntityFieldType {entity_field_type}",
        'entity_field_type')
示例#11
0
def revoked_to_prison(svr: schema.StateSupervisionViolationResponse) -> bool:
    """Determines if the provided |svr| resulted in a revocation."""
    if not svr.revocation_type:
        return False
    reincarceration_types = [
        StateSupervisionViolationResponseRevocationType.REINCARCERATION.value,
        StateSupervisionViolationResponseRevocationType.SHOCK_INCARCERATION.value,
        StateSupervisionViolationResponseRevocationType.TREATMENT_IN_PRISON.value]
    non_reincarceration_types = [
        StateSupervisionViolationResponseRevocationType.RETURN_TO_SUPERVISION.value]
    if svr.revocation_type in reincarceration_types:
        return True
    if svr.revocation_type in non_reincarceration_types:
        return False
    raise EntityMatchingError(f"Unexpected StateSupervisionViolationRevocationType {svr.revocation_type}.",
                              svr.get_entity_name())
示例#12
0
    def __init__(self, ingested_entity_tree: EntityTree,
                 merged_entity_trees: List[EntityTree], error_count: int):
        if not ingested_entity_tree:
            raise EntityMatchingError(
                "When creating IndividualMatchResult object, "
                "ingested_entity_tree field must be set",
                'individual_match_result')

        # The initial EntityTree to be matched to DB EntityTrees.
        self.ingested_entity_tree = ingested_entity_tree

        # If matching was successful, these are results of merging the
        # ingested_entity_tree with any of its DB matches.
        self.merged_entity_trees = merged_entity_trees

        # The number of errors encountered while matching this entity.
        self.error_count = error_count
def is_incarceration_period_match(
    ingested_entity: Union[EntityTree, StateBase],
    db_entity: Union[EntityTree, StateBase],
) -> bool:
    """
    Determines if the provided |ingested_entity| matches the |db_entity| based
    on ND specific StateIncarcerationPeriod matching.
    """
    if isinstance(ingested_entity, EntityTree):
        db_entity = cast(EntityTree, db_entity.entity)
        ingested_entity = ingested_entity.entity

    ingested_entity = cast(schema.StateIncarcerationPeriod, ingested_entity)
    db_entity = cast(schema.StateIncarcerationPeriod, db_entity)

    # Enforce that all objects being compared are for US_ND
    if ingested_entity.state_code != "US_ND" or db_entity.state_code != "US_ND":
        return False

    ingested_complete = is_incarceration_period_complete(ingested_entity)
    db_complete = is_incarceration_period_complete(db_entity)
    if not ingested_complete and not db_complete:
        return is_incomplete_incarceration_period_match(ingested_entity, db_entity)
    if ingested_complete and db_complete:
        return ingested_entity.external_id == db_entity.external_id

    # Only one of the two is complete
    complete, incomplete = (
        (ingested_entity, db_entity)
        if ingested_complete
        else (db_entity, ingested_entity)
    )

    complete_external_ids = complete.external_id.split(
        _INCARCERATION_PERIOD_ID_DELIMITER
    )
    incomplete_external_id = incomplete.external_id

    if len(complete_external_ids) != 2:
        raise EntityMatchingError(
            f"Could not split external id [{complete.external_id}] of complete incarceration period [{complete}] as "
            f"expected",
            ingested_entity.get_entity_name(),
        )

    return incomplete_external_id in complete_external_ids
示例#14
0
    def _match_entity_tree(self, *, ingested_entity_tree: EntityTree,
                           db_entity_trees: List[EntityTree],
                           matched_entities_by_db_ids: Dict[
                               int, List[DatabaseEntity]],
                           root_entity_cls: Type) -> IndividualMatchResult:
        if (isinstance(ingested_entity_tree.entity, self.erroring_class)
                and ingested_entity_tree.entity.get_external_id()
                in self.erroring_external_ids):
            raise EntityMatchingError(
                "error!", ingested_entity_tree.entity.get_entity_name())

        return super()._match_entity_tree(
            ingested_entity_tree=ingested_entity_tree,
            db_entity_trees=db_entity_trees,
            matched_entities_by_db_ids=matched_entities_by_db_ids,
            root_entity_cls=root_entity_cls,
        )
示例#15
0
def get_external_ids_of_cls(persons: List[schema.StatePerson],
                            cls: Type[DatabaseEntity]) -> Set[str]:
    """Returns the external ids of all entities of type |cls| found in the
    provided |persons| trees.
    """
    check_all_objs_have_type(persons, schema.StatePerson)

    ids: Set[str] = set()
    entities = get_all_entities_of_cls(persons, cls)
    for entity in entities:
        external_ids = get_external_ids_from_entity(entity)
        if not external_ids:
            raise EntityMatchingError(
                f'Expected all external_ids to be present in cls '
                f'[{cls.__name__}]', cls.__name__)
        ids.update(external_ids)
    return ids
示例#16
0
def set_field_from_list(entity: Entity, field_name: str, value: List):
    """Given the provided |value|, sets the value onto the provided |entity|
    based on the given |field_name|.
    """
    field = get_field(entity, field_name)
    if isinstance(field, list):
        set_field(entity, field_name, value)
    else:
        if not value:
            set_field(entity, field_name, None)
        elif len(value) == 1:
            set_field(entity, field_name, value[0])
        else:
            raise EntityMatchingError(
                f"Attempting to set singular field: {field_name} on entity: "
                f"{entity.get_entity_name()}, but got multiple values: "
                f"{value}.", entity.get_entity_name())
def _set_preceding_admission_reason(
    idx: int,
    sorted_ips: List[schema.StateIncarcerationPeriod],
    overrides: EnumOverrides,
) -> None:
    """
    Given a list of |sorted_ips| and an index |idx| which corresponds to a
    DOCR incarceration period, we select the admission reason of the most
    closely preceding period of temporary custody that is consecutive with
    the DOCR incarceration period.
    """

    beginning_ip = sorted_ips[idx]
    if _is_hold(beginning_ip):
        raise EntityMatchingError(
            f"Expected beginning_ip to NOT be a hold, instead found [{beginning_ip}] with "
            f"incarceration type [{beginning_ip.incarceration_type}]",
            "incarceration_period",
        )

    earliest_hold_admission_raw_text = None
    subsequent_ip = None
    while idx >= 0:
        ip = sorted_ips[idx]
        if not subsequent_ip:
            subsequent_ip = ip
            idx = idx - 1
            continue

        if not _is_hold(ip) or not _are_consecutive(ip, subsequent_ip):
            break

        earliest_hold_admission_raw_text = ip.admission_reason_raw_text
        subsequent_ip = ip
        idx = idx - 1

    # Update the original incarceration period's admission reason if necessary.
    if (
        earliest_hold_admission_raw_text
        and beginning_ip.admission_reason
        == StateIncarcerationPeriodAdmissionReason.TRANSFER.value
    ):
        beginning_ip.admission_reason = StateIncarcerationPeriodAdmissionReason.parse(
            earliest_hold_admission_raw_text, overrides
        ).value
示例#18
0
def get_external_ids_of_cls(persons: List[schema.StatePerson],
                            cls: Type[DatabaseEntity]) -> Set[str]:
    """Returns the external ids of all entities of type |cls| found in the
    provided |persons| trees.
    """
    check_all_objs_have_type(persons, schema.StatePerson)

    ids: Set[str] = set()
    entities = get_all_entities_of_cls(persons, cls)
    for entity in entities:
        external_ids = get_external_ids_from_entity(entity)
        if not external_ids:
            raise EntityMatchingError(
                f"Expected external_ids to be non-empty for entity [{entity}] with class [{cls.__name__}]",
                entity.get_class_id_name(),
            )
        ids.update(external_ids)
    return ids
示例#19
0
def add_child_to_entity(*, entity: Entity, child_field_name: str,
                        child_to_add: Entity):
    """Adds the |child_to_add| to the |child_field_name| field on the
    |entity|.
    """
    child_field = get_field(entity, child_field_name)

    if isinstance(child_field, list):
        if child_to_add not in child_field:
            child_field.append(child_to_add)
    else:
        if child_field and child_field != child_to_add:
            raise EntityMatchingError(
                f"Attempting to add child {child_to_add} to entity {entity}, "
                f"but {child_field_name} already had different value "
                f"{child_field}", entity.get_entity_name())
        child_field = child_to_add
    set_field(entity, child_field_name, child_field)
示例#20
0
def _nd_read_people(session: Session, region: str,
                    ingested_people: List[StatePerson]) -> List[StatePerson]:
    """ND specific code that looks up all people necessary for entity matching
    based on the provided |region| and |ingested_people|.
    """
    root_entity_cls = get_root_entity_cls(ingested_people)
    if root_entity_cls not in (StatePerson, StateSentenceGroup):
        raise EntityMatchingError(
            f'For region [{region}] found unexpected root_entity_cls: '
            f'[{root_entity_cls.__name__}]', 'root_entity_cls')
    root_external_ids = get_external_ids_of_cls(ingested_people,
                                                root_entity_cls)

    return dao.read_people_by_cls_external_ids(session,
                                               region,
                                               root_entity_cls,
                                               root_external_ids,
                                               populate_back_edges=False)
示例#21
0
def get_root_entity_cls(ingested_persons: List[StatePerson]) -> Type:
    """
    Attempts to find the highest entity class within the |ingested_persons| for
    which objects are not placeholders. Returns the class if found, otherwise
    raises.

    Note: This should only be used with persons ingested from a region directly
    (and not with persons post entity matching), as this function uses DFS to
    find the root entity cls. This therefore assumes that a) the passed in
    StatePersons are trees and not DAGs (one parent per entity) and b) that the
    structure of the passed in graph is symmetrical.
    """
    root_cls = None
    if ingested_persons:
        root_cls = _get_root_entity_helper(ingested_persons[0])
    if root_cls is None:
        raise EntityMatchingError(
            "Could not find root class for ingested persons", 'state_person')
    return root_cls
示例#22
0
def add_child_to_entity(*, entity: DatabaseEntity, child_field_name: str,
                        child_to_add: DatabaseEntity):
    """Adds the |child_to_add| to the |child_field_name| field on the
    |entity|.
    """

    child_field = entity.get_field(child_field_name)

    if isinstance(child_field, list):
        if child_to_add not in child_field:
            child_field.append(child_to_add)
    else:
        if child_field and child_field != child_to_add:
            raise EntityMatchingError(
                f"Attempting to add child [{child_to_add}] to entity [{entity}], but field [{child_field_name}] "
                f"already had different value [{child_field}]",
                entity.get_entity_name())
        child_field = child_to_add
        entity.set_field(child_field_name, child_field)
示例#23
0
    def resolve_child_match_result():
        """Resolves any child matches by removing the child from the ingested
        placeholder entity and adding the child onto the corresponding DB
        entity.
        """

        if not child_field_name or not child_match_result:
            raise EntityMatchingError(
                f"Expected child_field_name and child_match_result to be set, "
                f"but instead got {child_field_name} and {child_match_result} "
                f"respectively.",
                ingested_placeholder_tree.entity.get_entity_name())

        # If the child wasn't matched, leave it on the placeholder object.
        if not child_match_result.merged_entity_trees:
            placeholder_children.append(
                child_match_result.ingested_entity_tree.entity)
            return

        # Ensure the merged children are on the correct entity
        for merged_child_tree in child_match_result.merged_entity_trees:
            merged_parent_tree = merged_child_tree.generate_parent_tree()

            # If one of the merged parents is the ingested placeholder entity,
            # simply keep track of the child in placeholder_children.
            if merged_parent_tree.entity == ingested_placeholder_tree.entity:
                placeholder_children.append(
                    child_match_result.ingested_entity_tree.entity)
                continue

            add_child_to_entity(entity=merged_parent_tree.entity,
                                child_field_name=child_field_name,
                                child_to_add=merged_child_tree.entity)

            # Keep track of all db parents of the merged children.
            updated_entities = [m.entity for m in updated_entity_trees]
            if merged_parent_tree.entity not in updated_entities:
                _add_match_to_matched_entities_cache(
                    db_entity_match=merged_parent_tree.entity,
                    ingested_entity=ingested_placeholder_tree.entity,
                    matched_entities_by_db_ids=matched_entities_by_db_ids)
                updated_entity_trees.append(merged_parent_tree)
def _is_hold(ip: schema.StateIncarcerationPeriod) -> bool:
    """Determines if the provided |ip| represents a temporary hold and not a
    stay in a DOCR overseen facility.
    """

    # Everything before July 1, 2017 was overseen by DOCR.
    if ip.admission_date < datetime.date(year=2017, month=7, day=1):
        return False

    hold_types = [
        StateIncarcerationType.COUNTY_JAIL.value,
        StateIncarcerationType.EXTERNAL_UNKNOWN.value]
    non_hold_types = [StateIncarcerationType.STATE_PRISON.value]
    if ip.incarceration_type in hold_types:
        return True
    if ip.incarceration_type in non_hold_types:
        return False
    raise EntityMatchingError(
        f"Unexpected StateIncarcerationType"
        f"{ip.incarceration_type}.", ip.get_entity_name())
示例#25
0
def _get_all_database_entity_field_names(entity: DatabaseEntity,
                                         entity_field_type: EntityFieldType,
                                         direction_checker):
    """Returns a set of field_names that correspond to any set fields on the
    provided DatabaseEntity |entity| that match the provided
    |entity_field_type|.
    """
    back_edges = set()
    forward_edges = set()
    flat_fields = set()
    foreign_keys = set()

    for relationship_field_name in entity.get_relationship_property_names():
        if direction_checker.is_back_edge(entity, relationship_field_name):
            back_edges.add(relationship_field_name)
        else:
            forward_edges.add(relationship_field_name)

    for foreign_key_name in entity.get_foreign_key_names():
        foreign_keys.add(foreign_key_name)

    for column_field_name in entity.get_column_property_names():
        if column_field_name not in foreign_keys:
            flat_fields.add(column_field_name)

    if entity_field_type is EntityFieldType.FLAT_FIELD:
        return flat_fields
    if entity_field_type is EntityFieldType.FOREIGN_KEYS:
        return foreign_keys
    if entity_field_type is EntityFieldType.FORWARD_EDGE:
        return forward_edges
    if entity_field_type is EntityFieldType.BACK_EDGE:
        return back_edges
    if entity_field_type is EntityFieldType.ALL:
        return flat_fields | foreign_keys | forward_edges | back_edges
    raise EntityMatchingError(
        f"Unrecognized EntityFieldType {entity_field_type}",
        'entity_field_type')
示例#26
0
def read_persons_by_root_entity_cls(
        session: Session,
        region: str,
        ingested_people: List[schema.StatePerson],
        allowed_root_entity_classes: Optional[List[Type[DatabaseEntity]]],
) -> List[schema.StatePerson]:
    """Looks up all people necessary for entity matching based on the provided
    |region| and |ingested_people|.

    If |allowed_root_entity_classes| is provided, throw an error if any
    unexpected root entity class is found.
    """
    root_entity_cls = get_root_entity_cls(ingested_people)
    if allowed_root_entity_classes and root_entity_cls \
            not in allowed_root_entity_classes:
        raise EntityMatchingError(
            f'For region [{region}] found unexpected root_entity_cls: '
            f'[{root_entity_cls.__name__}]', 'root_entity_cls')
    root_external_ids = get_external_ids_of_cls(
        ingested_people, root_entity_cls)
    logging.info("[Entity Matching] Reading [%s] external ids of class [%s]",
                 len(root_external_ids), root_entity_cls.__name__)
    persons_by_root_entity = dao.read_people_by_cls_external_ids(
        session, region, root_entity_cls, root_external_ids)
    placeholder_persons = dao.read_placeholder_persons(session)

    # When the |root_entity_cls| is not StatePerson, it is possible for both
    # persons_by_root_entity and placeholder_persons to contain the same
    # placeholder person(s). For this reason, we dedup people across both lists
    # before returning.
    deduped_people = []
    seen_person_ids: Set[int] = set()
    for person in persons_by_root_entity + placeholder_persons:
        if person.person_id not in seen_person_ids:
            deduped_people.append(person)
            seen_person_ids.add(person.person_id)

    return deduped_people
示例#27
0
def _merge_incomplete_periods(
        a: StateIncarcerationPeriod, b: StateIncarcerationPeriod) \
        -> StateIncarcerationPeriod:
    if bool(a.admission_date) and bool(b.release_date):
        admission_period, release_period = a, b
    elif bool(a.release_date) and bool(b.admission_date):
        admission_period, release_period = b, a
    else:
        raise EntityMatchingError(
            f"Expected one admission period and one release period when "
            f"merging, instead found periods: {a}, {b}", a.get_entity_name())

    merged_period = attr.evolve(admission_period)
    admission_external_id = admission_period.external_id or ''
    release_external_id = release_period.external_id or ''
    new_external_id = admission_external_id \
                      + _INCARCERATION_PERIOD_ID_DELIMITER \
                      + release_external_id
    _default_merge_flat_fields(new_entity=release_period,
                               old_entity=merged_period)

    merged_period.external_id = new_external_id
    return merged_period
示例#28
0
def admitted_for_revocation(ip: StateIncarcerationPeriod) -> bool:
    """Determines if the provided |ip| began because of a revocation."""
    if not ip.admission_reason:
        return False
    revocation_types = [
        StateIncarcerationPeriodAdmissionReason.PAROLE_REVOCATION,
        StateIncarcerationPeriodAdmissionReason.PROBATION_REVOCATION
    ]
    non_revocation_types = [
        StateIncarcerationPeriodAdmissionReason.ADMITTED_IN_ERROR,
        StateIncarcerationPeriodAdmissionReason.EXTERNAL_UNKNOWN,
        StateIncarcerationPeriodAdmissionReason.NEW_ADMISSION,
        StateIncarcerationPeriodAdmissionReason.RETURN_FROM_ERRONEOUS_RELEASE,
        StateIncarcerationPeriodAdmissionReason.RETURN_FROM_ESCAPE,
        StateIncarcerationPeriodAdmissionReason.TRANSFER
    ]
    if ip.admission_reason in revocation_types:
        return True
    if ip.admission_reason in non_revocation_types:
        return False
    raise EntityMatchingError(
        f"Unexpected StateIncarcerationPeriodAdmissionReason "
        f"{ip.admission_reason}.", ip.get_entity_name())
示例#29
0
def set_field(entity: Entity, field_name: str, value: Any):
    if not hasattr(entity, field_name):
        raise EntityMatchingError(
            f"Expected entity {entity} to have field {field_name}, but it did "
            f"not.", entity.get_entity_name())
    return setattr(entity, field_name, value)
示例#30
0
def _is_match(*,
              ingested_entity: DatabaseEntity,
              db_entity: DatabaseEntity) -> bool:
    """Returns true if the provided |ingested_entity| matches the provided
    |db_entity|. Otherwise returns False.
    """
    if not ingested_entity or not db_entity:
        return ingested_entity == db_entity

    if ingested_entity.__class__ != db_entity.__class__:
        raise EntityMatchingError(
            f"is_match received entities of two different classes: "
            f"ingested entity {ingested_entity.__class__.__name__} and "
            f"db_entity {db_entity.__class__.__name__}",
            ingested_entity.get_entity_name())

    if not isinstance(ingested_entity, DatabaseEntity):
        raise EntityMatchingError(
            f"Unexpected type for ingested entity[{type(ingested_entity)}]",
            'unknown')
    if not isinstance(db_entity, DatabaseEntity):
        raise EntityMatchingError(
            f"Unexpected type for db entity[{type(db_entity)}]",
            'unknown')

    if isinstance(ingested_entity, schema.StatePerson):
        db_entity = cast(schema.StatePerson, db_entity)
        for ingested_external_id in ingested_entity.external_ids:
            for db_external_id in db_entity.external_ids:
                if _is_match(ingested_entity=ingested_external_id,
                             db_entity=db_external_id):
                    return True
        return False

    # Aside from people, all entities are state specific.
    if ingested_entity.get_field('state_code') \
            != db_entity.get_field('state_code'):
        return False

    # TODO(2671): Update all person attributes below to use complete entity
    # equality instead of just comparing individual fields.
    if isinstance(ingested_entity, schema.StatePersonExternalId):
        db_entity = cast(schema.StatePersonExternalId, db_entity)
        return ingested_entity.external_id == db_entity.external_id \
            and ingested_entity.id_type == db_entity.id_type

    # As person has already been matched, assume that any of these 'person
    # attribute' entities are matches if specific attributes match.
    if isinstance(ingested_entity, schema.StatePersonAlias):
        db_entity = cast(schema.StatePersonAlias, db_entity)
        return ingested_entity.full_name == db_entity.full_name
    if isinstance(ingested_entity, schema.StatePersonRace):
        db_entity = cast(schema.StatePersonRace, db_entity)
        return ingested_entity.race == db_entity.race
    if isinstance(ingested_entity, schema.StatePersonEthnicity):
        db_entity = cast(schema.StatePersonEthnicity, db_entity)
        return ingested_entity.ethnicity == db_entity.ethnicity

    if isinstance(ingested_entity,
                  (schema.StateSupervisionViolationResponseDecisionEntry,
                   schema.StateSupervisionViolatedConditionEntry,
                   schema.StateSupervisionViolationTypeEntry,
                   schema.StateSupervisionCaseTypeEntry)):
        return _base_entity_match(ingested_entity, db_entity)

    # Placeholders entities are considered equal
    if ingested_entity.get_external_id() is None \
            and db_entity.get_external_id() is None:
        return is_placeholder(ingested_entity) and is_placeholder(db_entity)
    return ingested_entity.get_external_id() == db_entity.get_external_id()