Exemplo n.º 1
0
def struct_to_token(
        attr: Annotation = Annotation("{struct}:{attr}"),
        token: Annotation = Annotation("<token>"),
        out: Output = Output("<token>:misc.from_struct_{struct}_{attr}")):
    """Convert an attribute on a structural annotation into a token attribute."""
    token_parents = token.get_parents(attr)
    attr_values = list(attr.read())
    out_values = [
        attr_values[p] if p is not None else "" for p in token_parents
    ]
    out.write(out_values)
Exemplo n.º 2
0
def metadata(out: Output = Output("{chunk}:geo.geo_metadata", description="Geographical places with coordinates"),
             chunk: Annotation = Annotation("{chunk}"),
             source: Annotation = Annotation("[geo.metadata_source]"),
             model: Model = Model("[geo.model]"),
             method: str = "populous",
             language: list = []):
    """Get location data based on metadata containing location names."""
    geomodel = load_model(model, language=language)

    same_target_source = chunk.split()[0] == source.split()[0]
    chunk_annotation = list(chunk.read())
    source_annotation = list(source.read())

    # If location source and target chunk are not the same, we need
    # to find the parent/child relations between them.
    if not same_target_source:
        target_source_parents = list(source.get_parents(chunk))

    chunk_locations = {}

    for i, _ in enumerate(chunk_annotation):
        if same_target_source:
            location_source = source_annotation[i]
        else:
            location_source = source_annotation[target_source_parents[i]] if target_source_parents[
                i] is not None else None

        if location_source:
            location_data = geomodel.get(location_source.strip().lower())
            if location_data:
                chunk_locations[i] = [(location_source, list(location_data))]
        else:
            chunk_locations[i] = []

    chunk_locations = most_populous(chunk_locations)

    out_annotation = chunk.create_empty_attribute()
    for c in chunk_locations:
        out_annotation[c] = _format_location(chunk_locations.get(c, ()))

    out.write(out_annotation)