def _get_data_site(cls, site: DataSite) -> APISite: """ Return the site serving as a tabular-data repository. @param site: The Wikibase site """ return site.tabular_data_repository()
def _get_data_site(cls: WB_TABULAR_DATA_CLASS, site: DataSite) -> APISite: """ Return the site serving as a tabular-data repository. :param site: The Wikibase site """ return site.tabular_data_repository()
def _get_data_site(cls, site: DataSite) -> APISite: """ Return the site serving as a geo-shape repository. @param site: The Wikibase site """ return site.geo_shape_repository()
def __init__(self, claim, languages, literals, delay=False, include_attribute_labels=False, qualifiers=None): """ Parse additional information about a specified claim. The result (dict format) is accessible through ParseClaim(claim).claim_details :param claim: pywikibot.Claim object to be parsed :type claim: pywikibot.Claim :param languages: list of language ISO codes :type languages: List(str) :param literals: list of literal properties to be included in result :type literals: List(str) """ if qualifiers is None: qualifiers = QUALIFIERS self.qualifiers = qualifiers if not isinstance(claim, Claim): claim = Claim.fromJSON(site=DataSite('wikidata', 'wikidata'), data=claim) self.include_attribute_labels = include_attribute_labels self.claim = claim self.languages = languages self.literals = literals if self.include_attribute_labels: self.literals = ['labels'] if delay: self.claim_details = {} else: self.claim_details = self.parse_claim()
def _get_data_site(cls: WB_GEO_SHAPE_CLASS, site: DataSite) -> APISite: """ Return the site serving as a geo-shape repository. :param site: The Wikibase site """ return site.geo_shape_repository()
def get_country_from_any(cls, itempage, local_attributes, languages, include_attribute_labels=True): """ Try to :param include_attribute_labels: :param itempage: parent item :param local_attributes: attributes which might be used to infer country :param languages: languages for country label :returns list with dictionaries ofID, labels of (preferred) country or countries. :raises ValueError if no country can be reconstrued. """ if local_attributes is None: local_attributes = LOCAL_ATTRIBUTES try: claims = itempage['claims'] except: claims = itempage.text['claims'] for location_type in local_attributes: if location_type in claims: for location in claims[location_type]: if location: if not isinstance(location, Claim): location = Claim.fromJSON(DataSite( 'wikidata', 'wikidata'), data=location) try: country = \ ParseItemPage.get_country_from_location( location.target, languages=languages, include_attribute_labels=include_attribute_labels ) if 'preferred' in country: return country['preferred'] elif len(country['values']) >= 1: return country['values'] else: pass except ValueError: pass else: logger.warn('Entity {} has location property {} ' 'set to null'.format( itempage['id'], location_type)) return None
def fromWikibase(cls, data: dict, site: DataSite): """ Constructor to create an object from Wikibase's JSON output. @param data: Wikibase JSON @param site: The Wikibase site @rtype: Coordinate """ globe = None if data['globe']: globes = {} for name, entity in site.globes().items(): globes[entity] = name globe = globes.get(data['globe']) return cls(data['latitude'], data['longitude'], data['altitude'], data['precision'], globe, site=site, globe_item=data['globe'])
def attribute_preferred_value(claim_instances): """When an attribute has several instances, try to retrieve the one with rank=preferred. Raises a ValueError when no or more than one `preferred` instances are found. :param claim_instances: List of `Claim`s. :returns a 1-member list containing the unique `preferred` value, or the input list if it has length 1. Raises ValueError otherwise.""" if len(claim_instances) == 1: return claim_instances else: try: claim_instances = [ Claim.fromJSON(DataSite('wikidata', 'wikidata'), claim_instance) for claim_instance in claim_instances ] # for claim_instance in claim_instances: # try: # claim_instance = Claim.fromJSON(DataSite('wikidata', 'wikidata'), claim_instance) # except: # pass # try: # claim_instance.get() except TypeError: pass preferred = [ claim for claim in claim_instances if claim.rank == 'preferred' ] if len(preferred) == 0: raise ValueError('No claim instance marked as preferred!') elif len(preferred) > 1: sample_claim = preferred[0] logger.info( 'Several instances of claim {} on entity {} marked as ' 'preferred, this is suspicious but does have valid use ' 'cases!'.format(sample_claim.id, sample_claim.snak.split('$')[0])) return [claim for claim in preferred]
vossantoDicts.append(d) sourceidListFromJson = sorted(list(set([d["sourceId"] for d in vossantoDicts]))) print(sourceidListFromJson.__len__()) #sourceidListFromJson = ["Q18218128","Q5443"] #sourceidList = ["Q9458","Q2685","Q5443","Q162629","Q235262","Q49481","Q381178","Q327071","Q25340127"] #common_links = ["https://commons.wikimedia.org/wiki/File:Michael_Jordan.jpg", # "https://commons.wikimedia.org/wiki/File:Busterkeaton_edit.jpg", # "https://commons.wikimedia.org/wiki/File:Goethe_(Stieler_1828).jpg" # ] wikidatapage = pywikibot.page.SiteLink('Q467658', DataSite("wikidata", "wikidata")) wikidata_id_no_pic = [] import requests def extract_image_license(image_name): start_of_end_point_str = 'https://commons.wikimedia.org' \ '/w/api.php?action=query&titles=File:' end_of_end_point_str = '&prop=imageinfo&iiprop=user' \ '|userid|canonicaltitle|url|extmetadata&format=json' result = requests.get(start_of_end_point_str + image_name + end_of_end_point_str) result = result.json()
def test_extract_literal_properties_freestanding(self): """ :return: """ try: claim = Claim.fromJSON( DataSite("wikidata", "wikidata"), { u'type': u'statement', u'references': [{ u'snaks': { u'P248': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 5375741 } }, u'property': u'P248', u'snaktype': u'value' }] }, u'hash': u'355b56329b78db22be549dec34f2570ca61ca056', u'snaks-order': [u'P248'] }, { u'snaks': { u'P1476': [{ u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u'Obituary: Douglas Adams', u'language': u'en' } }, u'property': u'P1476', u'snaktype': u'value' }], u'P407': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860 } }, u'property': u'P407', u'snaktype': u'value' }], u'P813': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002013-12-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P813', u'snaktype': u'value' }], u'P1433': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 11148 } }, u'property': u'P1433', u'snaktype': u'value' }], u'P854': [{ u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.theguardian.com/news/2001/may/15/guardianobituaries.books' }, u'property': u'P854', u'snaktype': u'value' }], u'P577': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-15T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P577', u'snaktype': u'value' }], u'P50': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 18145749 } }, u'property': u'P50', u'snaktype': u'value' }] }, u'hash': u'3f4d26cf841e20630c969afc0e48e5e3ef0c5a49', u'snaks-order': [ u'P854', u'P577', u'P813', u'P1433', u'P50', u'P1476', u'P407' ] }, { u'snaks': { u'P123': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 192621 } }, u'property': u'P123', u'snaktype': u'value' }], u'P1476': [{ u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u"Hitch Hiker's Guide author Douglas Adams dies aged 49", u'language': u'en' } }, u'property': u'P1476', u'snaktype': u'value' }], u'P407': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860 } }, u'property': u'P407', u'snaktype': u'value' }], u'P813': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-01-03T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P813', u'snaktype': u'value' }], u'P854': [{ u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.telegraph.co.uk/news/uknews/1330072/Hitch-Hikers-Guide-author-Douglas-Adams-dies-aged-49.html' }, u'property': u'P854', u'snaktype': u'value' }], u'P577': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-13T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P577', u'snaktype': u'value' }] }, u'hash': u'51a934797fd7f7d3ee91d4d541356d4c5974075b', u'snaks-order': [ u'P1476', u'P577', u'P123', u'P407', u'P854', u'P813' ] }, { u'snaks': { u'P248': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 36578 } }, u'property': u'P248', u'snaktype': u'value' }], u'P813': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-07-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P813', u'snaktype': u'value' }], u'P227': [{ u'datatype': u'external-id', u'datavalue': { u'type': u'string', u'value': u'119033364' }, u'property': u'P227', u'snaktype': u'value' }] }, u'hash': u'a02f3a77ddd343e6b88be25696b055f5131c3d64', u'snaks-order': [u'P248', u'P227', u'P813'] }], u'mainsnak': { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 350 } }, u'property': u'P19', u'snaktype': u'value' }, u'id': u'q42$3D284234-52BC-4DA3-83A3-7C39F84BA518', u'rank': u'normal' }) # target_id = 'Q{}'.format(claim['mainsnak']['datavalue']['value']['numeric-id']) target = claim.target # target = pywikibot.ItemPage.from_entity_uri(site=DataSite('wikidata', 'wikidata'), uri=target_id) result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print(result) assert result['labels']['en'] == 'Cambridge' entity_id = 'Q350' target = ItemPage.from_entity_uri( site=DataSite('wikidata', 'wikidata'), uri='http://www.wikidata.org/entity' + '/' + entity_id) print(target) result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print(result) assert result['labels']['en'] == 'Cambridge' except pywikibot.exceptions.MaxlagTimeoutError: warnings.warn('External API unreachable')
def get_images(itempage, image_width=DEFAULT_THUMBNAIL_WIDTH, image_types=image_attributes): """Find images of any specified type (e .g. 'image', 'flag'...). :param itempage: pywikibot.ItemPage :param image_width: width of the thumbnail :param image_types: dict of image-type properties identified by their Pxxx codes, such as `P18`: 'image', `P154`: 'logo image' :returns dict with keys=Pxx codes, values=list of [image_description_url, thumbnail_url, full_image_url] """ try: claims = itempage['claims'] except TypeError: claims = itempage.claims images_retrieved = {} for image_type in image_types: try: image = claims[image_type][0] except KeyError: # if we are looking for a *list* of image types, # we don't want to abort the process because # one is missing. continue try: target = image.getTarget() except AttributeError: from pywikibot import Claim from pywikibot.site import DataSite image = Claim.fromJSON(DataSite('wikidata', 'wikidata'), image) target = image.getTarget() claim_id = image.snak # str(target) returns a string of format [[site:namespace:filename]], # e. g. [[commons:File:Barack_Obama.jpg]], the wiki link of the image # page. We substitute this for a valid external link site, ns, link = image_interwiki_link = str( target).replace(' ', '_').strip('[]').split(':') image_description_page = u'https://{}.wikimedia.org/wiki/{}:{}'.format( *image_interwiki_link) # after: # https://stackoverflow.com/questions/34393884/how-to-get-image-url-property-from-wikidata-item-by-api thumbnail_template = u'https://{}.wikimedia.org/w/thumb.php?width={}&f={}' thumbnail_link = thumbnail_template.format(site, image_width, link) image_md5 = hashlib.md5(link.encode('utf-8')).hexdigest() a, b = image_md5[:2] direct_link_template = 'https://upload.wikimedia.org/wikipedia/{}/{}/{}/{}' image_direct_link = str(direct_link_template.format(site, a, a + b, quote(link.encode('utf-8')) )) images_retrieved[image_type] = OrderedDict( [('claim_id', claim_id), ('description_page', image_description_page), ('thumbnail', thumbnail_link), ('full', image_direct_link)]) try: assert images_retrieved except AssertionError: raise NoImageFoundError("No image available for entity!") return images_retrieved