Пример #1
0
def add_identifiers(identifiers: dict, catalog: str, entity: str,
                    sandbox: bool) -> None:
    """Add identifier statements to existing Wikidata items.

    :param identifiers: a ``{QID: catalog_identifier}`` dictionary
    :param catalog: ``{'discogs', 'imdb', 'musicbrainz', 'twitter'}``.
      A supported catalog
    :param entity: ``{'actor', 'band', 'director', 'musician', 'producer',
      'writer', 'audiovisual_work', 'musical_work'}``.
      A supported entity
    :param sandbox: whether to perform edits on the
      `Wikidata sandbox <https://www.wikidata.org/wiki/Q4115189>`_ item
    """
    catalog_qid = target_database.get_catalog_qid(catalog)
    catalog_pid = target_database.get_catalog_pid(catalog, entity)
    for qid, tid in identifiers.items():
        LOGGER.info('Processing %s match: %s -> %s', catalog, qid, tid)
        if sandbox:
            LOGGER.debug(
                'Using Wikidata sandbox item %s as subject, instead of %s',
                vocabulary.SANDBOX_1,
                qid,
            )
            _add_or_reference(vocabulary.SANDBOX_1, catalog_pid, tid,
                              catalog_qid)
        else:
            _add_or_reference(qid, catalog_pid, tid, catalog_qid)
Пример #2
0
def people_cli(catalog, statements, sandbox):
    """Add statements to Wikidata people.

    STATEMENTS must be a CSV file.
    Format: person_QID, PID, value

    If the claim already exists, just add a reference.

    Example:

    $ echo Q312387,P463,Q483407 > joey.csv

    $ python -m soweego ingester people discogs joey.csv

    Result:

    claim (Joey Ramone, member of, Ramones)

    reference (stated in, Discogs), (retrieved, today)
    """
    stated_in = target_database.get_catalog_qid(catalog)

    if sandbox:
        LOGGER.info('Running on the Wikidata sandbox item ...')

    for statement in statements:
        person, predicate, value = statement.rstrip().split(',')
        if sandbox:
            _add_or_reference(vocabulary.SANDBOX_1, predicate, value,
                              stated_in)
        else:
            _add_or_reference(person, predicate, value, stated_in)
Пример #3
0
def _upload(catalog, entity, to_deprecate, to_add, sandbox):
    catalog_qid = target_database.get_catalog_qid(catalog)
    LOGGER.info('Starting deprecation of %s IDs ...', catalog)
    wikidata_bot.delete_or_deprecate_identifiers('deprecate', catalog, entity,
                                                 to_deprecate, sandbox)
    LOGGER.info('Starting addition of statements to Wikidata ...')
    wikidata_bot.add_people_statements(to_add, catalog_qid, sandbox)
    return catalog_qid
Пример #4
0
def _set_catalog_fields(db_entity, name_field, catalog, entity):
    db_entity.name = name_field
    db_entity.active = 1
    db_entity.note = NOTE_FIELD
    db_entity.type = CATALOG_TYPES.get(catalog, '')
    db_entity.source_item = int(target_database.get_catalog_qid(catalog).lstrip('Q'))
    wd_prop = target_database.get_catalog_pid(catalog, entity)
    db_entity.wd_prop = int(wd_prop.lstrip('P'))
    db_entity.search_wp = SEARCH_WP_FIELD
Пример #5
0
def add_people_statements(catalog: str, statements: Iterable, criterion: str,
                          sandbox: bool) -> None:
    """Add statements to existing Wikidata people.

    Statements typically come from validation criteria 2 or 3
    as per :func:`soweego.validator.checks.links` and
    :func:`soweego.validator.checks.bio`.

    :param catalog: ``{'discogs', 'imdb', 'musicbrainz', 'twitter'}``.
      A supported catalog
    :param statements: iterable of
      (subject, predicate, value, catalog ID) tuples
    :param criterion: ``{'links', 'bio'}``. A supported validation criterion
    :param sandbox: whether to perform edits on the Wikidata `sandbox 2`_ item
    """
    if criterion == 'links':
        edit_summary = LINKS_VALIDATION_SUMMARY
    elif criterion == 'bio':
        edit_summary = BIO_VALIDATION_SUMMARY
    else:
        raise ValueError(f"Invalid criterion: '{criterion}'. "
                         "Please use either 'links' or 'bio'")

    sandbox_item = vocabulary.SANDBOX_2
    catalog_qid = target_database.get_catalog_qid(catalog)
    person_pid = target_database.get_person_pid(catalog)
    heuristic = vocabulary.RECORD_LINKAGE

    if sandbox:
        LOGGER.info('Running on the Wikidata sandbox item %s ...',
                    sandbox_item)

    for subject, predicate, value, catalog_id in statements:
        LOGGER.info(
            'Processing (%s, %s, %s, %s) statement ...',
            subject,
            predicate,
            value,
            catalog_id,
        )
        actual_subject = subject if not sandbox else sandbox_item
        _add_or_reference(
            (actual_subject, predicate, value),
            heuristic,
            catalog_qid=catalog_qid,
            catalog_pid=person_pid,
            catalog_id=catalog_id,
            edit_summary=edit_summary,
        )
Пример #6
0
def people_cli(catalog, statements, criterion, sandbox):
    """Add statements to Wikidata people.

    STATEMENTS must be a CSV file.
    Format: person_QID, PID, value, person_catalog_ID

    If the claim already exists, just add a reference.

    Example:

    $ echo Q312387,P463,Q483407,264375 > joey.csv

    $ python -m soweego ingester people discogs joey.csv

    Result:

    claim (Joey Ramone, member of, Ramones)

    reference (based on heuristic, record linkage), (stated in, Discogs), (Discogs artist ID, 264375), (retrieved, today)
    """
    sandbox_item = vocabulary.SANDBOX_2
    # See https://www.wikidata.org/wiki/Wikidata:Project_chat/Archive/2021/07#URLs_statistics_for_Discogs_(Q504063)_and_MusicBrainz_(Q14005)
    heuristic = vocabulary.RECORD_LINKAGE
    catalog_qid = target_database.get_catalog_qid(catalog)
    catalog_pid = target_database.get_person_pid(catalog)

    if criterion == 'links':
        edit_summary = LINKS_VALIDATION_SUMMARY
    elif criterion == 'bio':
        edit_summary = BIO_VALIDATION_SUMMARY
    else:
        edit_summary = None

    if sandbox:
        LOGGER.info('Running on the Wikidata sandbox item %s ...',
                    sandbox_item)

    stmt_reader = csv.reader(statements)
    for person, predicate, value, catalog_id in stmt_reader:
        subject = person if not sandbox else sandbox_item
        _add_or_reference(
            (subject, predicate, value),
            heuristic,
            catalog_qid=catalog_qid,
            catalog_pid=catalog_pid,
            catalog_id=catalog_id,
            edit_summary=edit_summary,
        )
Пример #7
0
def works_cli(catalog, statements, sandbox):
    """Add statements to Wikidata works.

    STATEMENTS must be a CSV file.
    Format: work_QID, PID, person_QID, person_target_ID

    If the claim already exists, just add a reference.

    Example:

    $ echo Q4354548,P175,Q5969,139984 > cmon.csv

    $ python -m soweego ingester works discogs cmon.csv

    Result:

    claim (C'mon Everybody, performer, Eddie Cochran)

    reference (based on heuristic, record linkage), (stated in, Discogs), (Discogs artist ID, 139984), (retrieved, today)
    """
    sandbox_item = vocabulary.SANDBOX_2
    catalog_qid = target_database.get_catalog_qid(catalog)
    is_imdb, person_pid = _get_works_args(catalog)
    heuristic = vocabulary.RECORD_LINKAGE

    if sandbox:
        LOGGER.info('Running on the Wikidata sandbox item %s ...',
                    sandbox_item)

    stmt_reader = csv.reader(statements)
    for work, predicate, person, person_id in stmt_reader:
        subject = work if not sandbox else sandbox_item
        _add_or_reference_works(
            (subject, predicate, person),
            heuristic,
            catalog_qid,
            person_pid,
            person_id,
            is_imdb=is_imdb,
            edit_summary=WORKS_SUMMARY,
        )
Пример #8
0
def add_works_statements(statements: Iterable, catalog: str,
                         sandbox: bool) -> None:
    """Add statements to existing Wikidata works.

    Statements typically come from
    :func:`soweego.validator.enrichment.generate_statements`.

    :param statements: iterable of
      (work QID, predicate, person QID, person target ID) tuples
    :param catalog: ``{'discogs', 'imdb', 'musicbrainz', 'twitter'}``.
      A supported catalog
    :param sandbox: whether to perform edits on the Wikidata `sandbox 2`_ item
    """
    sandbox_item = vocabulary.SANDBOX_2
    catalog_qid = target_database.get_catalog_qid(catalog)
    is_imdb, person_pid = _get_works_args(catalog)
    heuristic = vocabulary.RECORD_LINKAGE

    if sandbox:
        LOGGER.info('Running on the Wikidata sandbox item %s ...',
                    sandbox_item)

    for work, predicate, person, person_id in statements:
        LOGGER.info(
            'Processing (%s, %s, %s, %s) statement',
            work,
            predicate,
            person,
            person_id,
        )
        subject = work if not sandbox else sandbox_item
        _add_or_reference_works(
            (subject, predicate, person),
            heuristic,
            catalog_qid,
            person_pid,
            person_id,
            is_imdb=is_imdb,
            edit_summary=WORKS_SUMMARY,
        )
Пример #9
0
def add_people_statements(statements: Iterable, catalog: str,
                          sandbox: bool) -> None:
    """Add statements to existing Wikidata people.

    Statements typically come from validation criteria 2 or 3
    as per :func:`soweego.validator.checks.links` and
    :func:`soweego.validator.checks.bio`.

    :param statements: iterable of (subject, predicate, value) triples
    :param catalog: ``{'discogs', 'imdb', 'musicbrainz', 'twitter'}``.
      A supported catalog
    :param sandbox: whether to perform edits on the
      `Wikidata sandbox <https://www.wikidata.org/wiki/Q4115189>`_ item
    """
    catalog_qid = target_database.get_catalog_qid(catalog)
    for subject, predicate, value in statements:
        LOGGER.info('Processing (%s, %s, %s) statement', subject, predicate,
                    value)
        if sandbox:
            _add_or_reference(vocabulary.SANDBOX_1, predicate, value,
                              catalog_qid)
        else:
            _add_or_reference(subject, predicate, value, catalog_qid)
Пример #10
0
def _get_works_args(catalog):
    # Boolean to run IMDb-specific checks
    is_imdb = catalog == IMDB
    catalog_qid = target_database.get_catalog_qid(catalog)
    person_pid = target_database.get_person_pid(catalog)
    return catalog_qid, is_imdb, person_pid