示例#1
0
def try_map_response_to_movement(response: Dict) -> Dict:
    """Maps the oab movement attributes from the wikidata entity to the movement entity

    Args:
        response: wikidata entity to map to an oab entity

    Returns:
        A dict of an movement entity
    """
    start_time = map_wd_attribute.try_get_year_from_property_timestamp(
        response, PROPERTY_NAME_TO_PROPERTY_ID[START_TIME], MOVEMENT[SINGULAR]
    )
    end_time = map_wd_attribute.try_get_year_from_property_timestamp(
        response, PROPERTY_NAME_TO_PROPERTY_ID[END_TIME], MOVEMENT[SINGULAR]
    )
    # labels to be resolved later
    country = map_wd_attribute.try_get_first_qid(
        response, PROPERTY_NAME_TO_PROPERTY_ID[COUNTRY], MOVEMENT[SINGULAR]
    )
    has_part = map_wd_attribute.try_get_qid_reference_list(
        response, PROPERTY_NAME_TO_PROPERTY_ID[HAS_PART], MOVEMENT[SINGULAR]
    )
    part_of = map_wd_attribute.try_get_qid_reference_list(
        response, PROPERTY_NAME_TO_PROPERTY_ID[PART_OF], MOVEMENT[SINGULAR]
    )
    return {
        START_TIME: start_time,
        END_TIME: end_time,
        COUNTRY: country,
        HAS_PART: has_part,
        PART_OF: part_of,
    }
def try_map_response_to_subject(
    response: Dict,
    type_name: str,
    language_keys: Optional[List[str]] = lang_keys,
) -> Dict:
    """Maps the default attributes which every subject has:
    qid, image, label, description, classes, wikipediaLink (including language specific attributes)

    Args:
        response: The wikidata entity which should be mapped to an openArtBrowser entity
        type_name: Type name of the entity
        language_keys: All language keys which should be extracted. Defaults to languageconfig.csv

    Returns:
        A dict of an openArtBrowser entity
    """
    try:
        qid = response[ID]
    except Exception as error:
        logger.error("Error on qid, skipping item. Error: {0}".format(error))
        return None

    # How to get image url
    # https://stackoverflow.com/questions/34393884/how-to-get-image-url-property-from-wikidata-item-by-api
    try:
        image = get_image_url_by_name(
            response[CLAIMS][PROPERTY_NAME_TO_PROPERTY_ID[IMAGE]][0][MAINSNAK]
            [DATAVALUE][VALUE])
    except:
        image = ""

    label = map_wd_attribute.try_get_label_or_description(
        response, LABEL[PLURAL], EN, type_name)
    description = map_wd_attribute.try_get_label_or_description(
        response, DESCRIPTION[PLURAL], EN, type_name)
    classes = map_wd_attribute.try_get_qid_reference_list(
        response, PROPERTY_NAME_TO_PROPERTY_ID[CLASS[SINGULAR]], type_name)

    subject_dict = {
        ID: qid,
        CLASS[PLURAL]: classes,
        LABEL[SINGULAR]: label,
        DESCRIPTION[SINGULAR]: description,
        IMAGE: image,
    }

    for langkey in language_keys:
        label_lang = map_wd_attribute.try_get_label_or_description(
            response, LABEL[PLURAL], langkey, type_name)
        description_lang = map_wd_attribute.try_get_label_or_description(
            response, DESCRIPTION[PLURAL], langkey, type_name)
        wikipedia_link_lang = map_wd_attribute.try_get_wikipedia_link(
            response, langkey, type_name)
        subject_dict.update({
            f"{LABEL[SINGULAR]}_{langkey}": label_lang,
            f"{DESCRIPTION[SINGULAR]}_{langkey}": description_lang,
            f"{WIKIPEDIA_LINK}_{langkey}": wikipedia_link_lang,
        })

    return subject_dict
示例#3
0
def try_map_response_to_artist(response: Dict) -> Dict:
    """Maps the oab artist attributes from the wikidata entity to the artist entity

    Args:
        response: wikidata entity to map to an oab entity

    Returns:
        A dict of an artist entity
    """
    gender = map_wd_attribute.try_get_first_qid(
        response, PROPERTY_NAME_TO_PROPERTY_ID[GENDER], ARTIST[SINGULAR]
    )
    date_of_birth = map_wd_attribute.try_get_year_from_property_timestamp(
        response, PROPERTY_NAME_TO_PROPERTY_ID[DATE_OF_BIRTH], ARTIST[SINGULAR]
    )
    date_of_death = map_wd_attribute.try_get_year_from_property_timestamp(
        response, PROPERTY_NAME_TO_PROPERTY_ID[DATE_OF_DEATH], ARTIST[SINGULAR]
    )
    # labels to be resolved later
    place_of_birth = map_wd_attribute.try_get_first_qid(
        response, PROPERTY_NAME_TO_PROPERTY_ID[PLACE_OF_BIRTH], ARTIST[SINGULAR]
    )
    # labels to be resolved later
    place_of_death = map_wd_attribute.try_get_first_qid(
        response, PROPERTY_NAME_TO_PROPERTY_ID[PLACE_OF_DEATH], ARTIST[SINGULAR]
    )
    # labels to be resolved later
    citizenship = map_wd_attribute.try_get_first_qid(
        response, PROPERTY_NAME_TO_PROPERTY_ID[CITIZENSHIP], ARTIST[SINGULAR]
    )
    movements = map_wd_attribute.try_get_qid_reference_list(
        response, PROPERTY_NAME_TO_PROPERTY_ID[MOVEMENT[SINGULAR]], ARTIST[SINGULAR]
    )
    return {
        GENDER: gender,
        DATE_OF_BIRTH: date_of_birth,
        DATE_OF_DEATH: date_of_death,
        PLACE_OF_BIRTH: place_of_birth,
        PLACE_OF_DEATH: place_of_death,
        CITIZENSHIP: citizenship,
        MOVEMENT[PLURAL]: movements,
    }
示例#4
0
def try_map_response_to_location(response):
    """Maps the oab location attributes from the wikidata entity to the location entity

    Args:
        response: wikidata entity to map to an oab entity

    Returns:
        A dict of an location entity
    """
    country = map_wd_attribute.try_get_first_qid(
        response, PROPERTY_NAME_TO_PROPERTY_ID[COUNTRY], LOCATION[SINGULAR]
    )
    website = map_wd_attribute.try_get_first_value(
        response, PROPERTY_NAME_TO_PROPERTY_ID[WEBSITE], LOCATION[SINGULAR]
    )
    part_of = map_wd_attribute.try_get_qid_reference_list(
        response, PROPERTY_NAME_TO_PROPERTY_ID[PART_OF], LOCATION[SINGULAR]
    )
    try:
        coordinate = response[CLAIMS][PROPERTY_NAME_TO_PROPERTY_ID[COORDINATE]][0][
            MAINSNAK
        ][DATAVALUE][VALUE]
        lat = coordinate[LATITUDE[SINGULAR]]
        lon = coordinate[LONGITUDE[SINGULAR]]
    except Exception as error:
        logger.info(
            "Error on item {0}, property {1}, type {2}, error {3}".format(
                response[ID],
                PROPERTY_NAME_TO_PROPERTY_ID[COORDINATE],
                LOCATION[SINGULAR],
                error,
            )
        )
        lat = ""
        lon = ""
    return {
        COUNTRY: country,
        WEBSITE: website,
        PART_OF: part_of,
        LATITUDE[ABBREVIATION]: lat,
        LONGITUDE[ABBREVIATION]: lon,
    }
def get_classes(
    type_name: str,
    qids: List[str],
    already_extracted_superclass_ids: Set[str] = set(),
    language_keys: Optional[List[str]] = lang_keys,
) -> List[Dict]:
    """Function to extract the classes of the extracted wikidata entities (meaning the 'instance of' attribute wikidata entity qids).
    Their subclasses are also extracted recursively (also called transitive closure)

    Args:
        type_name: oab type e. g. movement
        qids: List of qids to extract the labels from
        language_keys: All language keys which should be extracted. Defaults to languageconfig.csv
        already_extracted_superclass_ids: A list of already extracted superclass ids for the recursive calls,
        this is also the anchor to stop recursion

    Returns:
        Returns a list of dicts with the classes from the oab entities and their subclasses
    """
    print(datetime.datetime.now(), f"Starting with {type_name}")
    if type_name == CLASS[PLURAL]:
        print(
            f"Total {type_name} to extract (only 'instance_of' of the provided qids): {len(qids)}"
        )
    else:
        print(
            f"Total {type_name} to extract (only 'subclass_of' of the provided qids): {len(qids)}"
        )
    item_count = 0
    extract_dicts = []
    chunk_size = 50  # The chunksize 50 is allowed by the wikidata api, bigger numbers need special permissions
    classes_id_chunks = chunks(list(qids), chunk_size)
    for chunk in classes_id_chunks:
        query_result = wikidata_entity_request(chunk)

        if ENTITIES not in query_result:
            logger.error("Skipping chunk")
            continue

        for result in query_result[ENTITIES].values():
            try:
                qid = result[ID]
            except Exception as error:
                logger.error(
                    "Error on qid, skipping item. Error: {0}".format(error))
                continue
            label = map_wd_attribute.try_get_label_or_description(
                result, LABEL[PLURAL], EN, type_name)
            description = map_wd_attribute.try_get_label_or_description(
                result, DESCRIPTION[PLURAL], EN, type_name)
            subclass_of = map_wd_attribute.try_get_qid_reference_list(
                result, PROPERTY_NAME_TO_PROPERTY_ID[SUBCLASS_OF], type_name)
            class_dict = {
                ID: qid,
                LABEL[SINGULAR]: label,
                DESCRIPTION[SINGULAR]: description,
                SUBCLASS_OF: subclass_of,
            }

            for langkey in language_keys:
                label_lang = map_wd_attribute.try_get_label_or_description(
                    result, LABEL[PLURAL], langkey, type_name)
                description_lang = map_wd_attribute.try_get_label_or_description(
                    result, DESCRIPTION[PLURAL], langkey, type_name)
                class_dict.update({
                    f"{LABEL[SINGULAR]}_{langkey}":
                    label_lang,
                    f"{DESCRIPTION[SINGULAR]}_{langkey}":
                    description_lang,
                })
            extract_dicts.append(class_dict)

        item_count += len(chunk)
        print(f"Status of {type_name}: {item_count}/{len(qids)}",
              end="\r",
              flush=True)

    return load_entities_by_attribute_with_transitive_closure(
        extract_dicts,
        SUBCLASS_OF,
        CLASS[PLURAL],
        already_extracted_superclass_ids,
        get_classes,
        [],
    )
def get_subject(
    type_name: str,
    qids: List[str],
    already_extracted_movement_ids: Set[str] = set(),
    language_keys: Optional[List[str]] = lang_keys,
) -> List[Dict]:
    """Extract subjects (in our definition everything except artworks e. g. movements, motifs, etc.) from wikidata

    Args:
        type_name: oab type name e. g. movements (Caution type names are always plural here)
        qids: A list of qids extracted from the artworks
        language_keys: All language keys which should be extracted. Defaults to languageconfig.csv

    Returns:
        A list of dicts with the subjects transformed from wikidata entities to oab entities
    """
    print(datetime.datetime.now(), f"Starting with {type_name}")
    print(f"Total {type_name} to extract: {len(qids)}")
    item_count = 0
    extract_dicts = []
    chunk_size = 50  # The chunksize 50 is allowed by the wikidata api, bigger numbers need special permissions
    subject_id_chunks = chunks(list(qids), chunk_size)
    for chunk in subject_id_chunks:
        query_result = wikidata_entity_request(chunk)

        if ENTITIES not in query_result:
            logger.error("Skipping chunk")
            continue

        for result in query_result[ENTITIES].values():
            subject_dict = map_wd_response.try_map_response_to_subject(
                result, type_name)
            if subject_dict is None:
                continue
            if type_name == MOVEMENT[PLURAL] or type_name == ARTIST[PLURAL]:
                influenced_by = map_wd_attribute.try_get_qid_reference_list(
                    result, PROPERTY_NAME_TO_PROPERTY_ID[INFLUENCED_BY],
                    type_name)
                subject_dict.update({INFLUENCED_BY: influenced_by})
            if type_name == MOVEMENT[PLURAL]:
                subject_dict.update(
                    map_wd_response.try_map_response_to_movement(result))
                already_extracted_movement_ids.add(subject_dict[ID])
            if type_name == ARTIST[PLURAL]:
                subject_dict.update(
                    map_wd_response.try_map_response_to_artist(result))
            if type_name == LOCATION[PLURAL]:
                subject_dict.update(
                    map_wd_response.try_map_response_to_location(result))
            extract_dicts.append(subject_dict)

        item_count += len(chunk)
        print(f"Status of {type_name}: {item_count}/{len(qids)}",
              end="\r",
              flush=True)

    if type_name == MOVEMENT[PLURAL]:
        extract_dicts = load_entities_by_attribute_with_transitive_closure(
            extract_dicts,
            PART_OF,
            MOVEMENT[PLURAL],
            already_extracted_movement_ids,
            get_subject,
            [ART_MOVEMENT[ID], ART_STYLE[ID]],
        )
        extract_dicts = load_entities_by_attribute_with_transitive_closure(
            extract_dicts,
            HAS_PART,
            MOVEMENT[PLURAL],
            already_extracted_movement_ids,
            get_subject,
            [ART_MOVEMENT[ID], ART_STYLE[ID]],
        )
        return extract_dicts

    print(datetime.datetime.now(), f"Finished with {type_name}")
    return extract_dicts