Exemplo n.º 1
0
def prune_dangling_placeholders_from_tree(
        entity: CoreEntity) -> Optional[CoreEntity]:
    """Recursively prunes dangling placeholders from the entity tree, returning
    the pruned entity tree, or None if the tree itself is a dangling
    placeholder. The returned tree will have no subtrees where all the nodes are
    placeholders.
    """

    has_non_placeholder_children = False
    for field_name in get_set_entity_field_names(
            entity, EntityFieldType.FORWARD_EDGE):

        children_to_keep = []
        for child in entity.get_field_as_list(field_name):
            pruned_tree = prune_dangling_placeholders_from_tree(child)
            if pruned_tree:
                children_to_keep.append(pruned_tree)
                has_non_placeholder_children = True

        entity.set_field_from_list(field_name, children_to_keep)

    if has_non_placeholder_children or not is_placeholder(entity):
        return entity

    return None
Exemplo n.º 2
0
 def __init__(self, database_entity: CoreEntity,
              ingested_entities: Sequence[CoreEntity]):
     msg_template = (
         "Matched one database entity to multiple ingested entities."
         "\nDatabase entity db id: {}"
         "\nIngested entities: {}")
     msg = msg_template.format(database_entity.get_id(),
                               '\n'.join(str(e) for e in ingested_entities))
     super().__init__(msg, database_entity.get_entity_name())
Exemplo n.º 3
0
def is_placeholder(entity: CoreEntity) -> bool:
    """Determines if the provided entity is a placeholder. Conceptually, a
    placeholder is an object that we have no information about, but have
    inferred its existence based on other objects we do have information about.
    Generally, an entity is a placeholder if all of the optional flat fields are
    empty or set to a default value.
    """

    # Although these are not flat fields, they represent characteristics of a
    # person. If present, we do have information about the provided person, and
    # therefore it is not a placeholder.
    if isinstance(entity, (schema.StatePerson, entities.StatePerson)):
        if any([
                entity.external_ids, entity.races, entity.aliases,
                entity.ethnicities
        ]):
            return False

    set_flat_fields = get_set_entity_field_names(entity,
                                                 EntityFieldType.FLAT_FIELD)

    primary_key_name = entity.get_primary_key_column_name()
    if primary_key_name in set_flat_fields:
        set_flat_fields.remove(primary_key_name)

    # TODO(2244): Change this to a general approach so we don't need to check
    # explicit columns
    if 'state_code' in set_flat_fields:
        set_flat_fields.remove('state_code')

    if 'status' in set_flat_fields:
        if entity.has_default_status():
            set_flat_fields.remove('status')

    if 'incarceration_type' in set_flat_fields:
        if entity.has_default_enum('incarceration_type',
                                   StateIncarcerationType.STATE_PRISON):
            set_flat_fields.remove('incarceration_type')

    if 'court_type' in set_flat_fields:
        if entity.has_default_enum('court_type',
                                   StateCourtType.PRESENT_WITHOUT_INFO):
            set_flat_fields.remove('court_type')

    if 'agent_type' in set_flat_fields:
        if entity.has_default_enum('agent_type',
                                   StateAgentType.PRESENT_WITHOUT_INFO):
            set_flat_fields.remove('agent_type')

    return not bool(set_flat_fields)
Exemplo n.º 4
0
    def __init__(self, ingested_entity: CoreEntity,
                 database_entities: Sequence[CoreEntity]):

        msg_template = (
            "Matched one ingested entity to multiple database entities."
            "\nIngested entity: {}"
            "\nDatabase entity db ids: {}")
        msg = msg_template.format(
            ingested_entity,
            '\n'.join(str(e.get_id()) for e in database_entities))
        super(MatchedMultipleDatabaseEntitiesError,
              self).__init__(msg, ingested_entity.get_entity_name())
Exemplo n.º 5
0
def get_set_entity_field_names(entity: CoreEntity,
                               entity_field_type: EntityFieldType) -> Set[str]:
    result = set()
    for field_name in get_all_core_entity_field_names(entity,
                                                      entity_field_type):
        v = entity.get_field(field_name)
        if isinstance(v, list):
            if v:
                result.add(field_name)
        elif v is not None:
            result.add(field_name)
    return result
Exemplo n.º 6
0
def get_all_core_entity_field_names(
        entity: CoreEntity, entity_field_type: EntityFieldType) -> Set[str]:
    """Returns a set of field_names that correspond to any set fields on the
    provided |entity| that match the provided |entity_field_type|.
    """
    if entity.get_entity_name().startswith('state_'):
        direction_checker = SchemaEdgeDirectionChecker.state_direction_checker(
        )
    else:
        direction_checker = \
            SchemaEdgeDirectionChecker.county_direction_checker()

    if isinstance(entity, DatabaseEntity):
        return _get_all_database_entity_field_names(entity, entity_field_type,
                                                    direction_checker)
    if isinstance(entity, Entity):
        return _get_all_entity_field_names(entity, entity_field_type,
                                           direction_checker)

    raise ValueError(f"Invalid entity type [{type(entity)}]")
Exemplo n.º 7
0
def print_entity_tree(entity: CoreEntity,
                      print_tree_structure_only: bool = False,
                      indent: int = 0,
                      python_id_to_fake_id: Dict[int, int] = None):
    """Recursively prints out all objects in the tree below the given entity. Each time we encounter a new object, we
    assign a new fake id (an auto-incrementing count) and print that with the object.

    This means that two entity trees with the exact same shape/flat fields will print out the exact same string, making
    it much easier to debug edge-related issues in Diffchecker, etc.

    Note: this function sorts any list fields in the provided entity IN PLACE (should not matter for any equality checks
    we generally do).
    """
    if python_id_to_fake_id is None:
        python_id_to_fake_id = {}
        _sort_based_on_flat_fields([entity])

    _print_indented(_obj_id_str(entity, python_id_to_fake_id), indent)

    indent = indent + 2
    for field in get_all_core_entity_field_names(entity, EntityFieldType.FLAT_FIELD):
        if field == 'external_id' or not print_tree_structure_only:
            val = entity.get_field(field)
            _print_indented(f'{field}: {str(val)}', indent)

    for child_field in get_all_core_entity_field_names(entity, EntityFieldType.FORWARD_EDGE):
        child = entity.get_field(child_field)

        if child is not None:
            if isinstance(child, list):
                if not child:
                    _print_indented(f'{child_field}: []', indent)
                else:
                    _print_indented(f'{child_field}: [', indent)
                    for c in child:
                        print_entity_tree(c, print_tree_structure_only, indent + 2, python_id_to_fake_id)
                    _print_indented(']', indent)

            else:
                _print_indented(f'{child_field}:', indent)
                print_entity_tree(child, print_tree_structure_only, indent + 2, python_id_to_fake_id)
        else:
            _print_indented(f'{child_field}: None', indent)

    for child_field in get_all_core_entity_field_names(entity, EntityFieldType.BACK_EDGE):
        child = entity.get_field(child_field)
        if child:
            if isinstance(child, list):
                first_child = next(iter(child))
                unique = {id(c) for c in child}
                len_str = f'{len(child)}' if len(unique) == len(child) else f'{len(child)} - ONLY {len(unique)} UNIQUE!'

                id_str = _obj_id_str(first_child, python_id_to_fake_id)
                ellipsis_str = ', ...' if len(child) > 1 else ''

                _print_indented(f'{child_field} ({len_str}): [{id_str}{ellipsis_str}] - backedge', indent)
            else:
                id_str = _obj_id_str(child, python_id_to_fake_id)
                _print_indented(f'{child_field}: {id_str} - backedge', indent)
        else:
            _print_indented(f'{child_field}: None - backedge', indent)
Exemplo n.º 8
0
def _get_flat_fields_json_str(entity: CoreEntity):
    flat_fields_dict: Dict[str, str] = {}
    for field_name in get_set_entity_field_names(
            entity, EntityFieldType.FLAT_FIELD):
        flat_fields_dict[field_name] = str(entity.get_field(field_name))
    return json.dumps(flat_fields_dict, sort_keys=True)