Пример #1
0
def simplify_provenance(proxy: EntityProxy) -> EntityProxy:
    """If there are multiple dates given for some of the provenance
    fields, we can logically conclude which one is the most meaningful."""
    for prop_name in ["modifiedAt", "retrievedAt"]:
        if proxy.has(prop_name, quiet=True):
            values = proxy.get(prop_name)
            proxy.set(prop_name, max(values))
    for prop_name in ["createdAt", "authoredAt", "publishedAt"]:
        if proxy.has(prop_name, quiet=True):
            values = proxy.get(prop_name)
            proxy.set(prop_name, min(values))
    return proxy
Пример #2
0
def remove_prefix_dates(entity: EntityProxy) -> EntityProxy:
    """If an entity has multiple values for a date field, you may
    want to remove all those that are prefixes of others. For example,
    if a Person has both a birthDate of 1990 and of 1990-05-01, we'd
    want to drop the mention of 1990."""
    for prop in entity.iterprops():
        if prop.type == registry.date:
            values = remove_prefix_date_values(entity.get(prop))
            entity.set(prop, values)
    return entity
Пример #3
0
def entity_filename(
    proxy: EntityProxy, base_name: Optional[str] = None, extension: Optional[str] = None
) -> Optional[str]:
    """Derive a safe filename for the given entity."""
    if proxy.schema.is_a("Document"):
        for extension_ in proxy.get("extension", quiet=True):
            if extension is not None:
                break
            extension = extension_
        for file_name in proxy.get("fileName", quiet=True):
            base_name_, extension_ = splitext(file_name)
            if base_name is None and len(base_name_):
                base_name = base_name_
            if extension is None and len(extension_):
                extension = extension_
        for mime_type in proxy.get("mimeType", quiet=True):
            if extension is not None:
                break
            extension = guess_extension(mime_type)
    base_name = base_name or proxy.id
    return safe_filename(base_name, extension=extension)
Пример #4
0
def name_entity(entity: EntityProxy) -> EntityProxy:
    """If an entity has multiple names, pick the most central one
    and set all the others as aliases. This is awkward given that
    names are not special and may not always be the caption."""
    if entity.schema.is_a("Thing"):
        names = entity.get("name")
        if len(names) > 1:
            name = registry.name.pick(names)
            if name in names:
                names.remove(name)
            entity.set("name", name)
            entity.add("alias", names)
    return entity