def _reference( claim: pywikibot.Claim, heuristic: str, catalog_qid=None, catalog_pid=None, catalog_id=None, edit_summary=None, ): reference_node, log_buffer = [], [] # Create `pywikibot.Claim` instances at runtime: # pywikibot would cry if the same instances get uploaded multiple times # over the same item # Depends on the bot task # (based on heuristic, `heuristic`) reference claim based_on_heuristic_reference = pywikibot.Claim( REPO, vocabulary.BASED_ON_HEURISTIC, is_reference=True) based_on_heuristic_reference.setTarget(pywikibot.ItemPage(REPO, heuristic)) reference_node.append(based_on_heuristic_reference) log_buffer.append(f'({based_on_heuristic_reference.getID()}, {heuristic})') # Validator tasks only if catalog_qid is not None: # (stated in, CATALOG) reference claim stated_in_reference = pywikibot.Claim(REPO, vocabulary.STATED_IN, is_reference=True) stated_in_reference.setTarget(pywikibot.ItemPage(REPO, catalog_qid)) reference_node.append(stated_in_reference) log_buffer.append(f'({stated_in_reference.getID()}, {catalog_qid})') if catalog_pid is not None and catalog_id is not None: # (catalog property, catalog ID) reference claim catalog_id_reference = pywikibot.Claim(REPO, catalog_pid, is_reference=True) catalog_id_reference.setTarget(catalog_id) reference_node.append(catalog_id_reference) log_buffer.append(f'({catalog_pid}, {catalog_id})') # All tasks # (retrieved, TODAY) reference claim retrieved_reference = pywikibot.Claim(REPO, vocabulary.RETRIEVED, is_reference=True) retrieved_reference.setTarget(TIMESTAMP) reference_node.append(retrieved_reference) log_buffer.append(f'({retrieved_reference.getID()}, {TODAY})') log_msg = ', '.join(log_buffer) try: claim.addSources(reference_node, summary=edit_summary) LOGGER.info('Added %s reference node', log_msg) except ( APIError, Error, ) as error: LOGGER.warning('Could not add %s reference node: %s', log_msg, error)
def setUp(self): wikidata = Site('test', 'wikidata') self.q_1 = Qualifier('P123', 'foo') self.q_2 = Qualifier('P123', 'bar') claim = Claim(wikidata, 'P55') claim.setTarget('foo') self.ref = Reference(source_test=[claim, ])
def fix(item: model.television.TvBase) -> Iterable[api.Fix]: title = None # For logging only _src, _src_key = None, None # Lookup IMDB if title is None: imdb_id = item.first_claim(wp.IMDB_ID.pid) title = imdb_title(imdb_id) _src, _src_key = "IMDB", imdb_id # Lookup tv.com if title is None: tv_com_id = item.first_claim(wp.TV_COM_ID.pid) title = tv_com_title(tv_com_id) _src, _src_key = "TV.com", tv_com_id # Lookup label if title is None: title = item.label _src, _src_key = "label", "en" # Did not find a title from any source if title is None: return [] print(f"Fetched title='{title}' from {_src} using {_src_key}") new_claim = Claim(item.repo, wp.TITLE.pid) new_claim.setTarget(WbMonolingualText(title, "en")) summary = f"Setting {wp.TITLE} to {title}" return [api.ClaimFix(new_claim, summary, item.itempage)]
def copy( self, src_item: ItemPage, dest_item: ItemPage, props: Iterable[wp.WikidataProperty], ): """Copy properties from the source item to the destination item Returns a tuple of (successes, failures) """ src_item.get() dest_item.get() failures = 0 successes = 0 for prop in props: if prop.pid not in src_item.claims: print(f"{prop} not found in {src_item.title()}") failures += 1 continue src_claims = src_item.claims[prop.pid] if len(src_claims) > 1: copy_multiple = click.confirm( f"There are {len(src_claims)} values for {prop}. Are you sure you want to copy all of them?" ) # copy_multiple = False if not copy_multiple: print( f"Cannot copy {prop} from {format(src_item)} to {format(dest_item)}. Only scalar properties can be copied" ) failures += 1 continue if prop.pid in dest_item.claims: print(f"{prop} already has a value in {format(dest_item)}") failures += 1 continue targets = [claim.getTarget() for claim in src_claims] for target in targets: if hasattr(target, "get"): target.get() target_str = printable_target_value(target) print( f"Copying {prop}={target_str} from {format(src_item)} to {format(dest_item)}" ) new_claim = Claim(self.repo, prop.pid) new_claim.setTarget(target) dest_item.addClaim( new_claim, summary=f"Setting {prop.pid} ({prop.name})" ) successes += 1 return (successes, failures)
def setUp(self): wikidata = Site('test', 'wikidata') self.q_1 = WD.Qualifier('P123', 'foo') self.q_2 = WD.Qualifier('P123', 'bar') claim = Claim(wikidata, 'P55') claim.setTarget('foo') self.ref = WD.Reference(source_test=[ claim, ])
def fix(item: model.api.Chainable) -> Iterable[Fix]: follows = item.previous if follows is None: print(f"autofix for follows_something({item.qid}) failed") return [] new_claim = Claim(item.repo, wp.FOLLOWS.pid) new_claim.setTarget(follows.itempage) summary = f"Setting {wp.FOLLOWS.pid} ({wp.FOLLOWS.name})" return [ClaimFix(new_claim, summary, item.itempage)]
def fix(item: model.api.Chainable) -> Iterable[Fix]: is_followed_by = item.next if is_followed_by is None: print(f"autofix for is_followed_by({item.qid}) failed") return [] new_claim = Claim(item.repo, wp.FOLLOWED_BY.pid) new_claim.setTarget(is_followed_by.itempage) summary = f"Setting {wp.FOLLOWED_BY.pid} ({wp.FOLLOWED_BY.name})" return [ClaimFix(new_claim, summary, item.itempage)]
def fix(item: model.television.Series) -> Iterable[api.ClaimFix]: if wp.IMDB_ID.pid not in item.claims: return [] number_of_episodes = no_of_episodes(item.first_claim(wp.IMDB_ID.pid)) if number_of_episodes is None: return [] claim = Claim(item.repo, wp.NUMBER_OF_EPISODES.pid) claim.setTarget(WbQuantity(number_of_episodes, site=item.repo)) summary = f"Setting {wp.NUMBER_OF_EPISODES} to {number_of_episodes}" return [api.ClaimFix(claim, summary=summary, itempage=item.itempage)]
def fix(item: model.television.Season) -> Iterable[api.Fix]: claim_fixes = [] for ordinal, episode in item.parts: qualifier = Claim(item.repo, wp.SERIES_ORDINAL.pid) qualifier.setTarget(str(ordinal)) new_claim = Claim(item.repo, wp.HAS_PART.pid) new_claim.setTarget(episode.itempage) new_claim.addQualifier(qualifier) summary = f"Adding {episode.qid} to {wp.HAS_PART.pid} ({wp.HAS_PART.name})" claim_fixes.append(api.ClaimFix(new_claim, summary, item.itempage)) return claim_fixes
def get_or_create_claim(item: ItemPage, p_value: str, value: Any, edit_group_hash: str) -> Tuple[Claim, bool]: """ Gets or creates a claim with `value` under the property `p_value` to `item` """ all_claims = item.claims.get(p_value, []) for claim in all_claims: if claim.target_equals(value): return claim, False claim = Claim(Settings.wikidata_repo, p_value) claim.setTarget(value) item.addClaim(claim, summary=get_sumary(edit_group_hash)) return claim, True
def main(dry=False): dry_str = "" if dry: print("Running in dry-run mode, will not implement any changes") dry_str = "[DRY-RUN MODE] " repo = Site().data_repository() for movie_id, movie_label in movies_with_missing_titles(): print( f"{dry_str}Setting title='{movie_label}' for {movie_id} ( https://www.wikidata.org/wiki/{movie_id} )" ) if not dry: movie_item = ItemPage(repo, movie_id) movie_item.get() claim = Claim(repo, wp.TITLE.pid) claim.setTarget(WbMonolingualText(movie_label, "en")) movie_item.addClaim(claim)
def __init__(self, claim, languages, literals, delay=False, include_attribute_labels=False, qualifiers=None): """ Parse additional information about a specified claim. The result (dict format) is accessible through ParseClaim(claim).claim_details :param claim: pywikibot.Claim object to be parsed :type claim: pywikibot.Claim :param languages: list of language ISO codes :type languages: List(str) :param literals: list of literal properties to be included in result :type literals: List(str) """ if qualifiers is None: qualifiers = QUALIFIERS self.qualifiers = qualifiers if not isinstance(claim, Claim): claim = Claim.fromJSON(site=DataSite('wikidata', 'wikidata'), data=claim) self.include_attribute_labels = include_attribute_labels self.claim = claim self.languages = languages self.literals = literals if self.include_attribute_labels: self.literals = ['labels'] if delay: self.claim_details = {} else: self.claim_details = self.parse_claim()
def attribute_preferred_value(claim_instances): """When an attribute has several instances, try to retrieve the one with rank=preferred. Raises a ValueError when no or more than one `preferred` instances are found. :param claim_instances: List of `Claim`s. :returns a 1-member list containing the unique `preferred` value, or the input list if it has length 1. Raises ValueError otherwise.""" if len(claim_instances) == 1: return claim_instances else: try: claim_instances = [Claim.fromJSON(DataSite('wikidata', 'wikidata'), claim_instance) for claim_instance in claim_instances] # for claim_instance in claim_instances: # try: # claim_instance = Claim.fromJSON(DataSite('wikidata', 'wikidata'), claim_instance) # except: # pass # try: # claim_instance.get() except TypeError: pass preferred = [ claim for claim in claim_instances if claim.rank == 'preferred'] if len(preferred) == 1: return [claim for claim in preferred] pass elif len(preferred) == 0: raise ValueError('No claim instance marked as preferred!') else: raise ValueError( 'Incorrectly tagged data: several instances ' 'marked as preferred, this should not happen!')
def set_claim_rank(claim: Claim, latest_version: str, release: Release): if latest_version is None: return if release.version == latest_version: if claim.getRank() != "preferred": logger.info("Setting prefered rank for {}".format( claim.getTarget())) claim.changeRank("preferred") else: if claim.getRank() != "normal": logger.info("Setting normal rank for {}".format(claim.getTarget())) claim.changeRank("normal")
def set_claim_rank(claim: Claim, latest_version: Optional[str], release: Release, edit_group_hash: str): if latest_version is None: return if release.version == latest_version: if claim.getRank() == "normal": logger.info("Setting prefered rank for {}".format( claim.getTarget())) claim.changeRank("preferred", summary=get_sumary(edit_group_hash)) else: if claim.getRank() == "preferred": logger.info("Setting normal rank for {}".format(claim.getTarget())) claim.changeRank("normal", summary=get_sumary(edit_group_hash))
def merge_claims(self, claim1, claim2): if claim1 == claim2: hashes = set( s['hash'] for s in claim1.toJSON().get('references', [])) for source in claim2.toJSON().get('references', []): if source['hash'] not in hashes: source_copy = Claim.referenceFromJSON(claim2.repo, source) claim1.sources.append(source_copy) return True else: return False
def copy_delayed( src_item: ItemPage, dest_item: ItemPage, props: Iterable[wp.WikidataProperty] ) -> Iterable[api.Fix]: repo = Site().data_repository() src_item.get() dest_item.get() claims = [] for prop in props: src_claims = src_item.claims.get(prop.pid, []) if len(src_claims) > 1: print( f"Cannot copy {prop} from {format(src_item)} to {format(dest_item)}. Only scalar properties can be copied" ) continue if prop.pid in dest_item.claims: print(f"{prop} already has a value in {format(dest_item)}") continue targets = [claim.getTarget() for claim in src_claims] for target in targets: target.get() target_str = printable_target_value(target) print( f"Creating claim to copy {prop}={target_str} from {format(src_item)} to {format(dest_item)}" ) new_claim = Claim(repo, prop.pid) new_claim.setTarget(target) summary = f"Setting {prop.pid} ({prop.name})" claims.append(api.ClaimFix(new_claim, summary, dest_item)) return claims
def get_country_from_any(cls, itempage, local_attributes, languages, include_attribute_labels=True): """ Try to :param include_attribute_labels: :param itempage: parent item :param local_attributes: attributes which might be used to infer country :param languages: languages for country label :returns list with dictionaries ofID, labels of (preferred) country or countries. :raises ValueError if no country can be reconstrued. """ if local_attributes is None: local_attributes = LOCAL_ATTRIBUTES try: claims = itempage['claims'] except: claims = itempage.text['claims'] for location_type in local_attributes: if location_type in claims: for location in claims[location_type]: if location: if not isinstance(location, Claim): location = Claim.fromJSON(DataSite( 'wikidata', 'wikidata'), data=location) try: country = \ ParseItemPage.get_country_from_location( location.target, languages=languages, include_attribute_labels=include_attribute_labels ) if 'preferred' in country: return country['preferred'] elif len(country['values']) >= 1: return country['values'] else: pass except ValueError: pass else: logger.warn('Entity {} has location property {} ' 'set to null'.format( itempage['id'], location_type)) return None
def merge_claims(self, claim1, claim2): if claim1 == claim2: if claim1.rank != claim2.rank: if claim1.rank != 'normal': if claim2.rank != 'normal': return False claim1.rank = claim2.rank hashes = set( s['hash'] for s in claim1.toJSON().get('references', [])) for source in claim2.toJSON().get('references', []): if source['hash'] not in hashes: source_copy = Claim.referenceFromJSON(claim2.repo, source) claim1.sources.append(source_copy) return True else: return False
def normalize_repo_url( item: ItemPage, url_normalized: str, url_raw: str, q_value: str, edit_group_hash: str, ): """ Canonicalize the github url This use the format https://github.com/[owner]/[repo] Note: This apparently only works with a bot account """ if url_raw == url_normalized: return logger.info("Normalizing {} to {}".format(url_raw, url_normalized)) source_p = Properties.source_code_repository urls = item.claims[source_p] if source_p in item.claims and len(urls) == 2: if urls[0].getTarget() == url_normalized and urls[1].getTarget( ) == url_raw: logger.info( "The old and the new url are already set, removing the old") item.removeClaims(urls[1], summary=get_sumary(edit_group_hash)) return if urls[0].getTarget() == url_raw and urls[1].getTarget( ) == url_normalized: logger.info( "The old and the new url are already set, removing the old") item.removeClaims(urls[0], summary=get_sumary(edit_group_hash)) return if source_p in item.claims and len(urls) > 1: logger.info( "Multiple source code repositories for {} not supported".format( q_value)) return if urls[0].getTarget() != url_raw: logger.error( f"The url on the object ({urls[0].getTarget()}) doesn't match the url from the sparql query ({url_raw}) for {q_value}" ) return # Editing is in this case actually remove the old value and adding the new one claim = Claim(Settings.wikidata_repo, source_p) claim.setTarget(url_normalized) claim.setSnakType("value") item.addClaim(claim, summary=get_sumary(edit_group_hash)) item.removeClaims(urls[0], summary=get_sumary(edit_group_hash)) # Add git as protocol git = ItemPage(Settings.wikidata_repo, "Q186055") get_or_create_qualifiers(claim, Properties.protocol, git, edit_group_hash)
def merge_claims(self, claim1, claim2): if self.claims_are_same(claim1, claim2): if claim1.rank != claim2.rank: if claim1.rank == 'normal': claim1.rank = claim2.rank elif claim2.rank != 'normal': return False hashes = { ref['hash'] for ref in claim1.toJSON().get('references', []) } for ref in claim2.toJSON().get('references', []): if ref['hash'] not in hashes: ref_copy = Claim.referenceFromJSON(claim2.repo, ref) claim1.sources.append(ref_copy) hashes.add(ref['hash']) return True else: return False
def get_or_create_qualifiers(claim: Claim, p_value: str, value: Any) -> Claim: """ Gets or creates a `qualifier` under the property `p_value` to `claim` """ all_qualifiers = claim.qualifiers.get(p_value, []) for qualifier in all_qualifiers: if qualifier.target_equals(value): break else: qualifier = Claim(Settings.wikidata_repo, p_value) qualifier.setTarget(value) claim.addQualifier(qualifier) return qualifier
def attribute_preferred_value(claim_instances): """When an attribute has several instances, try to retrieve the one with rank=preferred. Raises a ValueError when no or more than one `preferred` instances are found. :param claim_instances: List of `Claim`s. :returns a 1-member list containing the unique `preferred` value, or the input list if it has length 1. Raises ValueError otherwise.""" if len(claim_instances) == 1: return claim_instances else: try: claim_instances = [ Claim.fromJSON(DataSite('wikidata', 'wikidata'), claim_instance) for claim_instance in claim_instances ] # for claim_instance in claim_instances: # try: # claim_instance = Claim.fromJSON(DataSite('wikidata', 'wikidata'), claim_instance) # except: # pass # try: # claim_instance.get() except TypeError: pass preferred = [ claim for claim in claim_instances if claim.rank == 'preferred' ] if len(preferred) == 0: raise ValueError('No claim instance marked as preferred!') elif len(preferred) > 1: sample_claim = preferred[0] logger.info( 'Several instances of claim {} on entity {} marked as ' 'preferred, this is suspicious but does have valid use ' 'cases!'.format(sample_claim.id, sample_claim.snak.split('$')[0])) return [claim for claim in preferred]
def get_images(itempage, image_width=DEFAULT_THUMBNAIL_WIDTH, image_types=image_attributes): """Find images of any specified type (e .g. 'image', 'flag'...). :param itempage: pywikibot.ItemPage :param image_width: width of the thumbnail :param image_types: dict of image-type properties identified by their Pxxx codes, such as `P18`: 'image', `P154`: 'logo image' :returns dict with keys=Pxx codes, values=list of [image_description_url, thumbnail_url, full_image_url] """ try: claims = itempage['claims'] except TypeError: claims = itempage.claims images_retrieved = {} for image_type in image_types: try: image = claims[image_type][0] except KeyError: # if we are looking for a *list* of image types, # we don't want to abort the process because # one is missing. continue try: target = image.getTarget() except AttributeError: from pywikibot import Claim from pywikibot.site import DataSite image = Claim.fromJSON(DataSite('wikidata', 'wikidata'), image) target = image.getTarget() claim_id = image.snak # str(target) returns a string of format [[site:namespace:filename]], # e. g. [[commons:File:Barack_Obama.jpg]], the wiki link of the image # page. We substitute this for a valid external link site, ns, link = image_interwiki_link = str( target).replace(' ', '_').strip('[]').split(':') image_description_page = u'https://{}.wikimedia.org/wiki/{}:{}'.format( *image_interwiki_link) # after: # https://stackoverflow.com/questions/34393884/how-to-get-image-url-property-from-wikidata-item-by-api thumbnail_template = u'https://{}.wikimedia.org/w/thumb.php?width={}&f={}' thumbnail_link = thumbnail_template.format(site, image_width, link) image_md5 = hashlib.md5(link.encode('utf-8')).hexdigest() a, b = image_md5[:2] direct_link_template = 'https://upload.wikimedia.org/wikipedia/{}/{}/{}/{}' image_direct_link = str(direct_link_template.format(site, a, a + b, quote(link.encode('utf-8')) )) images_retrieved[image_type] = OrderedDict( [('claim_id', claim_id), ('description_page', image_description_page), ('thumbnail', thumbnail_link), ('full', image_direct_link)]) try: assert images_retrieved except AssertionError: raise NoImageFoundError("No image available for entity!") return images_retrieved
def new_claim(self, prop): return Claim(self.repo, prop)
from eWRT.ws.wikidata.definitions.property_definitions import image_attributes from eWRT.ws.wikidata.get_image_from_wikidataid import get_images, get_image DataSite, itempage = mock.Mock(), mock.Mock() itempage.claims = { 'P1442': [ Claim.fromJSON( DataSite("wikidata", "wikidata"), { u'type': u'statement', u'mainsnak': { u'datatype': u'commonsMedia', u'datavalue': { u'type': u'string', u'value': u"Douglas Adams' gravestone.jpg" }, u'property': u'P1442', u'snaktype': u'value' }, u'id': u'Q42$db1ba2ba-47b9-3650-e6c4-db683abf788c', u'rank': u'normal' }) ], 'P18': [ Claim.fromJSON( DataSite("wikidata", "wikidata"), { u'mainsnak': { u'datatype': u'commonsMedia', u'datavalue': { u'type': u'string',
def get_images(itempage, image_width=DEFAULT_THUMBNAIL_WIDTH, image_types=image_attributes): """Find images of any specified type (e .g. 'image', 'flag'...). :param itempage: pywikibot.ItemPage :param image_width: width of the thumbnail :param image_types: dict of image-type properties identified by their Pxxx codes, such as `P18`: 'image', `P154`: 'logo image' :returns dict with keys=Pxx codes, values=list of [image_description_url, thumbnail_url, full_image_url] """ try: claims = itempage['claims'] except TypeError: claims = itempage.claims images_retrieved = {} for image_type in image_types: try: image = claims[image_type][0] except KeyError: # if we are looking for a *list* of image types, # we don't want to abort the process because # one is missing. continue try: target = image.getTarget() except AttributeError: from pywikibot import Claim from pywikibot.site import DataSite image = Claim.fromJSON(DataSite('wikidata', 'wikidata'), image) target = image.getTarget() claim_id = image.snak # str(target) returns a string of format [[site:namespace:filename]], # e. g. [[commons:File:Barack_Obama.jpg]], the wiki link of the image # page. We substitute this for a valid external link site, ns, link = image_interwiki_link = unicode( target).replace(' ', '_').strip('[]').split(':') image_description_page = u'https://{}.wikimedia.org/wiki/{}:{}'.format( *image_interwiki_link) # after: # https://stackoverflow.com/questions/34393884/how-to-get-image-url-property-from-wikidata-item-by-api thumbnail_template = u'https://{}.wikimedia.org/w/thumb.php?width={}&f={}' thumbnail_link = thumbnail_template.format(site, image_width, link) image_md5 = hashlib.md5(link.encode('utf-8')).hexdigest() a, b = image_md5[:2] direct_link_template = 'https://upload.wikimedia.org/wikipedia/{}/{}/{}/{}' image_direct_link = unicode(direct_link_template.format(site, a, a + b, quote(link.encode('utf-8')) )) images_retrieved[image_type] = OrderedDict( [('claim_id', claim_id), ('description_page', image_description_page), ('thumbnail', thumbnail_link), ('full', image_direct_link)]) try: assert images_retrieved except AssertionError: raise NoImageFoundError("No image available for entity!") return images_retrieved
def test_extract_literal_properties_freestanding(): """ :return: """ claim = Claim.fromJSON(DataSite("wikidata", "wikidata"), {u'type': u'statement', u'references': [{ u'snaks': { u'P248': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 5375741}}, u'property': u'P248', u'snaktype': u'value'}]}, u'hash': u'355b56329b78db22be549dec34f2570ca61ca056', u'snaks-order': [ u'P248']}, { u'snaks': { u'P1476': [ { u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u'Obituary: Douglas Adams', u'language': u'en'}}, u'property': u'P1476', u'snaktype': u'value'}], u'P407': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860}}, u'property': u'P407', u'snaktype': u'value'}], u'P813': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002013-12-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P813', u'snaktype': u'value'}], u'P1433': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 11148}}, u'property': u'P1433', u'snaktype': u'value'}], u'P854': [ { u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.theguardian.com/news/2001/may/15/guardianobituaries.books'}, u'property': u'P854', u'snaktype': u'value'}], u'P577': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-15T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P577', u'snaktype': u'value'}], u'P50': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 18145749}}, u'property': u'P50', u'snaktype': u'value'}]}, u'hash': u'3f4d26cf841e20630c969afc0e48e5e3ef0c5a49', u'snaks-order': [ u'P854', u'P577', u'P813', u'P1433', u'P50', u'P1476', u'P407']}, { u'snaks': { u'P123': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 192621}}, u'property': u'P123', u'snaktype': u'value'}], u'P1476': [ { u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u"Hitch Hiker's Guide author Douglas Adams dies aged 49", u'language': u'en'}}, u'property': u'P1476', u'snaktype': u'value'}], u'P407': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860}}, u'property': u'P407', u'snaktype': u'value'}], u'P813': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-01-03T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P813', u'snaktype': u'value'}], u'P854': [ { u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.telegraph.co.uk/news/uknews/1330072/Hitch-Hikers-Guide-author-Douglas-Adams-dies-aged-49.html'}, u'property': u'P854', u'snaktype': u'value'}], u'P577': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-13T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P577', u'snaktype': u'value'}]}, u'hash': u'51a934797fd7f7d3ee91d4d541356d4c5974075b', u'snaks-order': [ u'P1476', u'P577', u'P123', u'P407', u'P854', u'P813']}, { u'snaks': { u'P248': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 36578}}, u'property': u'P248', u'snaktype': u'value'}], u'P813': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-07-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P813', u'snaktype': u'value'}], u'P227': [ { u'datatype': u'external-id', u'datavalue': { u'type': u'string', u'value': u'119033364'}, u'property': u'P227', u'snaktype': u'value'}]}, u'hash': u'a02f3a77ddd343e6b88be25696b055f5131c3d64', u'snaks-order': [ u'P248', u'P227', u'P813']}], u'mainsnak': { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 350}}, u'property': u'P19', u'snaktype': u'value'}, u'id': u'q42$3D284234-52BC-4DA3-83A3-7C39F84BA518', u'rank': u'normal'}) # target_id = 'Q{}'.format(claim['mainsnak']['datavalue']['value']['numeric-id']) target = claim.target # target = pywikibot.ItemPage.from_entity_uri(site=DataSite('wikidata', 'wikidata'), uri=target_id) result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print result assert result['labels']['en'] == 'Cambridge' entity_id = 'Q350' target = ItemPage.from_entity_uri( site=DataSite('wikidata', 'wikidata'), uri='http://www.wikidata.org/entity' + '/' + entity_id) print target result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print result assert result['labels']['en'] == 'Cambridge'
import pytest from pywikibot import Claim from eWRT.ws.wikidata.definitions.property_definitions import image_attributes from eWRT.ws.wikidata.get_image_from_wikidataid import get_images, get_image DataSite, itempage = mock.Mock(), mock.Mock() itempage.claims = { 'P1442': [Claim.fromJSON(DataSite("wikidata", "wikidata"), { u'type': u'statement', u'mainsnak': { u'datatype': u'commonsMedia', u'datavalue': {u'type': u'string', u'value': u"Douglas Adams' gravestone.jpg"}, u'property': u'P1442', u'snaktype': u'value'}, u'id': u'Q42$db1ba2ba-47b9-3650-e6c4-db683abf788c', u'rank': u'normal' }) ], 'P18': [Claim.fromJSON(DataSite("wikidata", "wikidata"), { u'mainsnak': { u'datatype': u'commonsMedia', u'datavalue': { u'type': u'string', u'value': u'Douglas adams portrait cropped.jpg'}, u'property': u'P18', u'snaktype': u'value'}, u'rank': u'normal', u'qualifiers': {u'P2096': [ {u'datatype': u'monolingualtext',
class TestReference(unittest.TestCase): """Test Reference.""" def setUp(self): wikidata = Site('test', 'wikidata') self.ref_1 = Claim(wikidata, 'P55') self.ref_1.setTarget('foo') self.ref_2 = Claim(wikidata, 'P55') self.ref_2.setTarget('bar') def test_reference_init_empty_error(self): with self.assertRaises(pwbError) as cm: WD.Reference() self.assertEqual( str(cm.exception), 'You tried to create a reference without any sources') def test_reference_init_non_claim_error(self): with self.assertRaises(pwbError) as cm: WD.Reference(source_test='foo') self.assertEqual( str(cm.exception), 'You tried to create a reference with a non-Claim source') with self.assertRaises(pwbError) as cm: WD.Reference(source_notest='foo') self.assertEqual( str(cm.exception), 'You tried to create a reference with a non-Claim source') def test_reference_init_single_claim_gives_list(self): r_test = WD.Reference(source_test=self.ref_1) self.assertEqual(r_test.source_test, [self.ref_1]) self.assertEqual(r_test.source_notest, []) r_notest = WD.Reference(source_notest=self.ref_1) self.assertEqual(r_notest.source_test, []) self.assertEqual(r_notest.source_notest, [self.ref_1]) r_both = WD.Reference(self.ref_1, self.ref_2) self.assertEqual(r_both.source_test, [self.ref_1]) self.assertEqual(r_both.source_notest, [self.ref_2]) def test_reference_init_with_list(self): r_test = WD.Reference(source_test=[self.ref_1, self.ref_2]) self.assertEqual(r_test.source_test, [self.ref_1, self.ref_2]) self.assertEqual(r_test.source_notest, []) r_notest = WD.Reference(source_notest=[self.ref_1, self.ref_2]) self.assertEqual(r_notest.source_test, []) self.assertEqual(r_notest.source_notest, [self.ref_1, self.ref_2]) r_both = WD.Reference([self.ref_1, self.ref_2], [self.ref_2, self.ref_1]) self.assertEqual(r_both.source_test, [self.ref_1, self.ref_2]) self.assertEqual(r_both.source_notest, [self.ref_2, self.ref_1]) def test_reference_get_all_sources(self): r_test = WD.Reference(source_test=self.ref_1) self.assertEqual(r_test.get_all_sources(), [self.ref_1]) r_notest = WD.Reference(source_notest=self.ref_1) self.assertEqual(r_notest.get_all_sources(), [self.ref_1]) r_both = WD.Reference(self.ref_1, self.ref_2) self.assertEqual(r_both.get_all_sources(), [self.ref_1, self.ref_2]) def test_reference_repr(self): """Also ensures there is a repr for Claim.""" r = WD.Reference(self.ref_1, self.ref_2) self.assertEqual( repr(r), 'WD.Reference(' 'test: [WD.Claim(P55: foo)], ' 'no_test: [WD.Claim(P55: bar)])')
def setUp(self): wikidata = Site('test', 'wikidata') self.ref_1 = Claim(wikidata, 'P55') self.ref_1.setTarget('foo') self.ref_2 = Claim(wikidata, 'P55') self.ref_2.setTarget('bar')
def test_extract_literal_properties_freestanding(self): """ :return: """ try: claim = Claim.fromJSON( DataSite("wikidata", "wikidata"), { u'type': u'statement', u'references': [{ u'snaks': { u'P248': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 5375741 } }, u'property': u'P248', u'snaktype': u'value' }] }, u'hash': u'355b56329b78db22be549dec34f2570ca61ca056', u'snaks-order': [u'P248'] }, { u'snaks': { u'P1476': [{ u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u'Obituary: Douglas Adams', u'language': u'en' } }, u'property': u'P1476', u'snaktype': u'value' }], u'P407': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860 } }, u'property': u'P407', u'snaktype': u'value' }], u'P813': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002013-12-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P813', u'snaktype': u'value' }], u'P1433': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 11148 } }, u'property': u'P1433', u'snaktype': u'value' }], u'P854': [{ u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.theguardian.com/news/2001/may/15/guardianobituaries.books' }, u'property': u'P854', u'snaktype': u'value' }], u'P577': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-15T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P577', u'snaktype': u'value' }], u'P50': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 18145749 } }, u'property': u'P50', u'snaktype': u'value' }] }, u'hash': u'3f4d26cf841e20630c969afc0e48e5e3ef0c5a49', u'snaks-order': [ u'P854', u'P577', u'P813', u'P1433', u'P50', u'P1476', u'P407' ] }, { u'snaks': { u'P123': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 192621 } }, u'property': u'P123', u'snaktype': u'value' }], u'P1476': [{ u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u"Hitch Hiker's Guide author Douglas Adams dies aged 49", u'language': u'en' } }, u'property': u'P1476', u'snaktype': u'value' }], u'P407': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860 } }, u'property': u'P407', u'snaktype': u'value' }], u'P813': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-01-03T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P813', u'snaktype': u'value' }], u'P854': [{ u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.telegraph.co.uk/news/uknews/1330072/Hitch-Hikers-Guide-author-Douglas-Adams-dies-aged-49.html' }, u'property': u'P854', u'snaktype': u'value' }], u'P577': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-13T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P577', u'snaktype': u'value' }] }, u'hash': u'51a934797fd7f7d3ee91d4d541356d4c5974075b', u'snaks-order': [ u'P1476', u'P577', u'P123', u'P407', u'P854', u'P813' ] }, { u'snaks': { u'P248': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 36578 } }, u'property': u'P248', u'snaktype': u'value' }], u'P813': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-07-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P813', u'snaktype': u'value' }], u'P227': [{ u'datatype': u'external-id', u'datavalue': { u'type': u'string', u'value': u'119033364' }, u'property': u'P227', u'snaktype': u'value' }] }, u'hash': u'a02f3a77ddd343e6b88be25696b055f5131c3d64', u'snaks-order': [u'P248', u'P227', u'P813'] }], u'mainsnak': { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 350 } }, u'property': u'P19', u'snaktype': u'value' }, u'id': u'q42$3D284234-52BC-4DA3-83A3-7C39F84BA518', u'rank': u'normal' }) # target_id = 'Q{}'.format(claim['mainsnak']['datavalue']['value']['numeric-id']) target = claim.target # target = pywikibot.ItemPage.from_entity_uri(site=DataSite('wikidata', 'wikidata'), uri=target_id) result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print(result) assert result['labels']['en'] == 'Cambridge' entity_id = 'Q350' target = ItemPage.from_entity_uri( site=DataSite('wikidata', 'wikidata'), uri='http://www.wikidata.org/entity' + '/' + entity_id) print(target) result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print(result) assert result['labels']['en'] == 'Cambridge' except pywikibot.exceptions.MaxlagTimeoutError: warnings.warn('External API unreachable')
def add_us_as_source(existing_claim: pywikibot.Claim): existing_claim.addSources(build_sources_claims())
def get_or_create_sources( claim: Claim, url: str, retrieved, edit_group_hash: str, title: Optional[str] = None, date: Optional[WbTime] = None, ): """ Gets or creates a `source` under the property `claim` to `url` """ all_sources = [] src_p = Properties.reference_url for i in claim.sources or []: if src_p in i: all_sources.append(i[src_p][0]) for src_url in all_sources: if src_url.target_equals(url): break else: src_url = Claim(Settings.wikidata_repo, src_p) src_url.setTarget(url) src_retrieved = Claim(Settings.wikidata_repo, Properties.retrieved) src_retrieved.setTarget(retrieved) sources = [src_url, src_retrieved] if title: src_title = Claim(Settings.wikidata_repo, Properties.title) src_title.setTarget(pywikibot.WbMonolingualText(title, "en")) sources.append(src_title) if date: src_date = Claim(Settings.wikidata_repo, Properties.publication_date) src_date.setTarget(date) sources.append(src_date) claim.addSources(sources, summary=get_sumary(edit_group_hash)) return src_url