示例#1
0
    def _get_data_site(cls, site: DataSite) -> APISite:
        """
        Return the site serving as a tabular-data repository.

        @param site: The Wikibase site
        """
        return site.tabular_data_repository()
示例#2
0
    def _get_data_site(cls: WB_TABULAR_DATA_CLASS, site: DataSite) -> APISite:
        """
        Return the site serving as a tabular-data repository.

        :param site: The Wikibase site
        """
        return site.tabular_data_repository()
示例#3
0
    def _get_data_site(cls, site: DataSite) -> APISite:
        """
        Return the site serving as a geo-shape repository.

        @param site: The Wikibase site
        """
        return site.geo_shape_repository()
示例#4
0
    def __init__(self,
                 claim,
                 languages,
                 literals,
                 delay=False,
                 include_attribute_labels=False,
                 qualifiers=None):
        """
        Parse additional information about a specified claim. The result
        (dict format) is accessible through ParseClaim(claim).claim_details

        :param claim: pywikibot.Claim object to be parsed
        :type claim: pywikibot.Claim
        :param languages: list of language ISO codes
        :type languages: List(str)
        :param literals: list of literal properties to be included in result
        :type literals: List(str)
        """
        if qualifiers is None:
            qualifiers = QUALIFIERS
        self.qualifiers = qualifiers
        if not isinstance(claim, Claim):
            claim = Claim.fromJSON(site=DataSite('wikidata', 'wikidata'),
                                   data=claim)

        self.include_attribute_labels = include_attribute_labels
        self.claim = claim
        self.languages = languages
        self.literals = literals
        if self.include_attribute_labels:
            self.literals = ['labels']
        if delay:
            self.claim_details = {}
        else:
            self.claim_details = self.parse_claim()
示例#5
0
    def _get_data_site(cls: WB_GEO_SHAPE_CLASS, site: DataSite) -> APISite:
        """
        Return the site serving as a geo-shape repository.

        :param site: The Wikibase site
        """
        return site.geo_shape_repository()
示例#6
0
 def get_country_from_any(cls,
                          itempage,
                          local_attributes,
                          languages,
                          include_attribute_labels=True):
     """
     Try to
     :param include_attribute_labels:
     :param itempage: parent item
     :param local_attributes: attributes which might be used to infer country
     :param languages: languages for country label
     :returns list with dictionaries ofID, labels of (preferred) country or
     countries.
     :raises ValueError if no country can be reconstrued.
     """
     if local_attributes is None:
         local_attributes = LOCAL_ATTRIBUTES
     try:
         claims = itempage['claims']
     except:
         claims = itempage.text['claims']
     for location_type in local_attributes:
         if location_type in claims:
             for location in claims[location_type]:
                 if location:
                     if not isinstance(location, Claim):
                         location = Claim.fromJSON(DataSite(
                             'wikidata', 'wikidata'),
                                                   data=location)
                     try:
                         country = \
                             ParseItemPage.get_country_from_location(
                                 location.target,
                                 languages=languages,
                                 include_attribute_labels=include_attribute_labels
                             )
                         if 'preferred' in country:
                             return country['preferred']
                         elif len(country['values']) >= 1:
                             return country['values']
                         else:
                             pass
                     except ValueError:
                         pass
                 else:
                     logger.warn('Entity {} has location property {} '
                                 'set to null'.format(
                                     itempage['id'], location_type))
     return None
示例#7
0
    def fromWikibase(cls, data: dict, site: DataSite):
        """
        Constructor to create an object from Wikibase's JSON output.

        @param data: Wikibase JSON
        @param site: The Wikibase site
        @rtype: Coordinate
        """
        globe = None

        if data['globe']:
            globes = {}
            for name, entity in site.globes().items():
                globes[entity] = name

            globe = globes.get(data['globe'])

        return cls(data['latitude'], data['longitude'],
                   data['altitude'], data['precision'],
                   globe, site=site, globe_item=data['globe'])
示例#8
0
def attribute_preferred_value(claim_instances):
    """When an attribute has several instances, try to
    retrieve the one with rank=preferred. Raises a ValueError
    when no or more than one `preferred` instances are found.
    :param claim_instances: List of `Claim`s.
    :returns a 1-member list containing the unique `preferred`
        value, or the input list if it has length 1. Raises
        ValueError otherwise."""

    if len(claim_instances) == 1:
        return claim_instances
    else:
        try:
            claim_instances = [
                Claim.fromJSON(DataSite('wikidata', 'wikidata'),
                               claim_instance)
                for claim_instance in claim_instances
            ]
        # for claim_instance in claim_instances:
        #     try:
        #         claim_instance = Claim.fromJSON(DataSite('wikidata', 'wikidata'), claim_instance)
        #     except:
        #         pass
        #     try:
        #         claim_instance.get()
        except TypeError:
            pass
        preferred = [
            claim for claim in claim_instances if claim.rank == 'preferred'
        ]
        if len(preferred) == 0:
            raise ValueError('No claim instance marked as preferred!')
        elif len(preferred) > 1:
            sample_claim = preferred[0]
            logger.info(
                'Several instances of claim {} on entity {} marked as '
                'preferred, this is suspicious but does have valid use '
                'cases!'.format(sample_claim.id,
                                sample_claim.snak.split('$')[0]))
        return [claim for claim in preferred]
        vossantoDicts.append(d)

sourceidListFromJson = sorted(list(set([d["sourceId"]
                                        for d in vossantoDicts])))
print(sourceidListFromJson.__len__())
#sourceidListFromJson  = ["Q18218128","Q5443"]

#sourceidList = ["Q9458","Q2685","Q5443","Q162629","Q235262","Q49481","Q381178","Q327071","Q25340127"]

#common_links = ["https://commons.wikimedia.org/wiki/File:Michael_Jordan.jpg",
#                "https://commons.wikimedia.org/wiki/File:Busterkeaton_edit.jpg",
#                "https://commons.wikimedia.org/wiki/File:Goethe_(Stieler_1828).jpg"
#                ]

wikidatapage = pywikibot.page.SiteLink('Q467658',
                                       DataSite("wikidata", "wikidata"))

wikidata_id_no_pic = []

import requests


def extract_image_license(image_name):

    start_of_end_point_str = 'https://commons.wikimedia.org' \
                         '/w/api.php?action=query&titles=File:'
    end_of_end_point_str = '&prop=imageinfo&iiprop=user' \
                       '|userid|canonicaltitle|url|extmetadata&format=json'
    result = requests.get(start_of_end_point_str + image_name +
                          end_of_end_point_str)
    result = result.json()
示例#10
0
    def test_extract_literal_properties_freestanding(self):
        """

        :return:
        """
        try:
            claim = Claim.fromJSON(
                DataSite("wikidata", "wikidata"), {
                    u'type':
                    u'statement',
                    u'references': [{
                        u'snaks': {
                            u'P248': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 5375741
                                    }
                                },
                                u'property': u'P248',
                                u'snaktype': u'value'
                            }]
                        },
                        u'hash': u'355b56329b78db22be549dec34f2570ca61ca056',
                        u'snaks-order': [u'P248']
                    }, {
                        u'snaks': {
                            u'P1476': [{
                                u'datatype': u'monolingualtext',
                                u'datavalue': {
                                    u'type': u'monolingualtext',
                                    u'value': {
                                        u'text': u'Obituary: Douglas Adams',
                                        u'language': u'en'
                                    }
                                },
                                u'property': u'P1476',
                                u'snaktype': u'value'
                            }],
                            u'P407': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 1860
                                    }
                                },
                                u'property': u'P407',
                                u'snaktype': u'value'
                            }],
                            u'P813': [{
                                u'datatype': u'time',
                                u'datavalue': {
                                    u'type': u'time',
                                    u'value': {
                                        u'after': 0,
                                        u'precision': 11,
                                        u'time':
                                        u'+00000002013-12-07T00:00:00Z',
                                        u'timezone': 0,
                                        u'calendarmodel':
                                        u'http://www.wikidata.org/entity/Q1985727',
                                        u'before': 0
                                    }
                                },
                                u'property': u'P813',
                                u'snaktype': u'value'
                            }],
                            u'P1433': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 11148
                                    }
                                },
                                u'property': u'P1433',
                                u'snaktype': u'value'
                            }],
                            u'P854': [{
                                u'datatype': u'url',
                                u'datavalue': {
                                    u'type':
                                    u'string',
                                    u'value':
                                    u'http://www.theguardian.com/news/2001/may/15/guardianobituaries.books'
                                },
                                u'property': u'P854',
                                u'snaktype': u'value'
                            }],
                            u'P577': [{
                                u'datatype': u'time',
                                u'datavalue': {
                                    u'type': u'time',
                                    u'value': {
                                        u'after': 0,
                                        u'precision': 11,
                                        u'time':
                                        u'+00000002001-05-15T00:00:00Z',
                                        u'timezone': 0,
                                        u'calendarmodel':
                                        u'http://www.wikidata.org/entity/Q1985727',
                                        u'before': 0
                                    }
                                },
                                u'property': u'P577',
                                u'snaktype': u'value'
                            }],
                            u'P50': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 18145749
                                    }
                                },
                                u'property': u'P50',
                                u'snaktype': u'value'
                            }]
                        },
                        u'hash':
                        u'3f4d26cf841e20630c969afc0e48e5e3ef0c5a49',
                        u'snaks-order': [
                            u'P854', u'P577', u'P813', u'P1433', u'P50',
                            u'P1476', u'P407'
                        ]
                    }, {
                        u'snaks': {
                            u'P123': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 192621
                                    }
                                },
                                u'property': u'P123',
                                u'snaktype': u'value'
                            }],
                            u'P1476': [{
                                u'datatype': u'monolingualtext',
                                u'datavalue': {
                                    u'type': u'monolingualtext',
                                    u'value': {
                                        u'text':
                                        u"Hitch Hiker's Guide author Douglas Adams dies aged 49",
                                        u'language': u'en'
                                    }
                                },
                                u'property': u'P1476',
                                u'snaktype': u'value'
                            }],
                            u'P407': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 1860
                                    }
                                },
                                u'property': u'P407',
                                u'snaktype': u'value'
                            }],
                            u'P813': [{
                                u'datatype': u'time',
                                u'datavalue': {
                                    u'type': u'time',
                                    u'value': {
                                        u'after': 0,
                                        u'precision': 11,
                                        u'time':
                                        u'+00000002015-01-03T00:00:00Z',
                                        u'timezone': 0,
                                        u'calendarmodel':
                                        u'http://www.wikidata.org/entity/Q1985727',
                                        u'before': 0
                                    }
                                },
                                u'property': u'P813',
                                u'snaktype': u'value'
                            }],
                            u'P854': [{
                                u'datatype': u'url',
                                u'datavalue': {
                                    u'type':
                                    u'string',
                                    u'value':
                                    u'http://www.telegraph.co.uk/news/uknews/1330072/Hitch-Hikers-Guide-author-Douglas-Adams-dies-aged-49.html'
                                },
                                u'property': u'P854',
                                u'snaktype': u'value'
                            }],
                            u'P577': [{
                                u'datatype': u'time',
                                u'datavalue': {
                                    u'type': u'time',
                                    u'value': {
                                        u'after': 0,
                                        u'precision': 11,
                                        u'time':
                                        u'+00000002001-05-13T00:00:00Z',
                                        u'timezone': 0,
                                        u'calendarmodel':
                                        u'http://www.wikidata.org/entity/Q1985727',
                                        u'before': 0
                                    }
                                },
                                u'property': u'P577',
                                u'snaktype': u'value'
                            }]
                        },
                        u'hash':
                        u'51a934797fd7f7d3ee91d4d541356d4c5974075b',
                        u'snaks-order': [
                            u'P1476', u'P577', u'P123', u'P407', u'P854',
                            u'P813'
                        ]
                    }, {
                        u'snaks': {
                            u'P248': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 36578
                                    }
                                },
                                u'property': u'P248',
                                u'snaktype': u'value'
                            }],
                            u'P813': [{
                                u'datatype': u'time',
                                u'datavalue': {
                                    u'type': u'time',
                                    u'value': {
                                        u'after': 0,
                                        u'precision': 11,
                                        u'time':
                                        u'+00000002015-07-07T00:00:00Z',
                                        u'timezone': 0,
                                        u'calendarmodel':
                                        u'http://www.wikidata.org/entity/Q1985727',
                                        u'before': 0
                                    }
                                },
                                u'property': u'P813',
                                u'snaktype': u'value'
                            }],
                            u'P227': [{
                                u'datatype': u'external-id',
                                u'datavalue': {
                                    u'type': u'string',
                                    u'value': u'119033364'
                                },
                                u'property': u'P227',
                                u'snaktype': u'value'
                            }]
                        },
                        u'hash': u'a02f3a77ddd343e6b88be25696b055f5131c3d64',
                        u'snaks-order': [u'P248', u'P227', u'P813']
                    }],
                    u'mainsnak': {
                        u'datatype': u'wikibase-item',
                        u'datavalue': {
                            u'type': u'wikibase-entityid',
                            u'value': {
                                u'entity-type': u'item',
                                u'numeric-id': 350
                            }
                        },
                        u'property': u'P19',
                        u'snaktype': u'value'
                    },
                    u'id':
                    u'q42$3D284234-52BC-4DA3-83A3-7C39F84BA518',
                    u'rank':
                    u'normal'
                })
            # target_id = 'Q{}'.format(claim['mainsnak']['datavalue']['value']['numeric-id'])

            target = claim.target
            # target = pywikibot.ItemPage.from_entity_uri(site=DataSite('wikidata', 'wikidata'), uri=target_id)
            result = ParseItemPage.extract_literal_properties(
                entity=target, languages=['en'], literals=['labels'])
            print(result)
            assert result['labels']['en'] == 'Cambridge'
            entity_id = 'Q350'
            target = ItemPage.from_entity_uri(
                site=DataSite('wikidata', 'wikidata'),
                uri='http://www.wikidata.org/entity' + '/' + entity_id)
            print(target)
            result = ParseItemPage.extract_literal_properties(
                entity=target, languages=['en'], literals=['labels'])
            print(result)
            assert result['labels']['en'] == 'Cambridge'
        except pywikibot.exceptions.MaxlagTimeoutError:
            warnings.warn('External API unreachable')
示例#11
0
def get_images(itempage,
               image_width=DEFAULT_THUMBNAIL_WIDTH,
               image_types=image_attributes):
    """Find images of any specified type (e .g. 'image', 'flag'...).
    :param itempage: pywikibot.ItemPage
    :param image_width: width of the thumbnail
    :param image_types: dict of image-type properties identified
        by their Pxxx codes, such as `P18`: 'image', `P154`: 'logo image'
    :returns dict with keys=Pxx codes, values=list of [image_description_url,
        thumbnail_url, full_image_url]
    """
    try:
        claims = itempage['claims']
    except TypeError:
        claims = itempage.claims
    images_retrieved = {}
    for image_type in image_types:
        try:
            image = claims[image_type][0]
        except KeyError:
            # if we are looking for a *list* of image types,
            # we don't want to abort the process because
            # one is missing.
            continue
        try:
            target = image.getTarget()
        except AttributeError:
            from pywikibot import Claim
            from pywikibot.site import DataSite
            image = Claim.fromJSON(DataSite('wikidata', 'wikidata'), image)
            target = image.getTarget()
        claim_id = image.snak
        # str(target) returns a string of format [[site:namespace:filename]],
        # e. g. [[commons:File:Barack_Obama.jpg]], the wiki link of the image
        # page. We substitute this for a valid external link
        site, ns, link = image_interwiki_link = str(
            target).replace(' ', '_').strip('[]').split(':')
        image_description_page = u'https://{}.wikimedia.org/wiki/{}:{}'.format(
            *image_interwiki_link)

        # after:
        # https://stackoverflow.com/questions/34393884/how-to-get-image-url-property-from-wikidata-item-by-api
        thumbnail_template = u'https://{}.wikimedia.org/w/thumb.php?width={}&f={}'
        thumbnail_link = thumbnail_template.format(site, image_width, link)
        image_md5 = hashlib.md5(link.encode('utf-8')).hexdigest()
        a, b = image_md5[:2]
        direct_link_template = 'https://upload.wikimedia.org/wikipedia/{}/{}/{}/{}'
        image_direct_link = str(direct_link_template.format(site, a, a + b,
                                                        quote(link.encode('utf-8'))
                                                        ))

        images_retrieved[image_type] = OrderedDict(
            [('claim_id', claim_id),
             ('description_page', image_description_page),
             ('thumbnail', thumbnail_link),
             ('full', image_direct_link)])
    try:
        assert images_retrieved
    except AssertionError:
        raise NoImageFoundError("No image available for entity!")
    return images_retrieved