def set_claim_with_start_and_end(self, prop, target_values, main_key, protoclaims): """ Add a claim with start and end date qualifiers to protoclaims. Requires the value to be resolvable using kulturnav2Wikidata. @param prop: the property of the claim @type prop: str @param target_values: the values for the claim @type target_values: dict|list (of dict)|None @param main_key: the key for the main entry of the target_values dict @type main_key: str @param protoclaims: the dict of claims to add @type protoclaims: dict """ if target_values: target_values = helpers.listify(target_values) claims = [] for val in target_values: claim = WD.Statement(self.kulturnav2Wikidata(val[main_key])) claims.append( helpers.add_start_end_qualifiers(claim, val[u'start'], val[u'end'])) if claims: protoclaims[prop] = claims
def withClaimTest(self, hitItem, P, Q, descr, orNone=True): """ Execute base test that an item contains a certain claim. param hitItem: item to check param P: the property to look for param Q: (list) of Q claim to look for param descr: a descriptive text param orNone: if complete absence of the Property is also ok return bool """ P = u'P%s' % P.lstrip('P') Q = helpers.listify(Q) testItems = [] for q in Q: testItems.append(self.wd.QtoItemPage(q)) # check claims if P in hitItem.claims.keys(): for testItem in testItems: if self.wd.has_claim(P, testItem, hitItem): return True else: pywikibot.output(u'%s is identified as something other ' u'than a %s. Check!' % (hitItem.title(), descr)) return False elif orNone: # no P claim return True
def add_statement(self, prop_name, value, quals=None, ref=None): """ Add a statement to the data object. :param prop_name: P-item representing property :type prop_name: string :param value: content of the statement :type value: it can be a string representing a Q-item or a dictionary of an amount :param quals: possibly qualifier items :type quals: a wikidatastuff Qualifier item, or a list of them :param ref: reference item :type ref: a wikidatastuff Reference item """ base = self.wd_item["statements"] prop = self.props[prop_name] if quals is None: quals = [] wd_claim = self.make_pywikibot_item(value) statement = self.make_statement(wd_claim) for qual in helpers.listify(quals): statement.addQualifier(qual) base.append({"prop": prop, "value": statement, "ref": ref})
def format_protoclaims(self): """Create a preview table for the protoclaims.""" table_head = ("{| class='wikitable'\n" "|-\n" "! Property\n" "! Value\n" "! Qualifiers\n") table_end = '|}' table_row = ('|-\n' '| {prop} \n' '| {value} \n' '| {quals} \n') # format each table row rows = [] for prop, statements in self.protoclaims.items(): if not statements: continue prop = PreviewItem.make_wikidata_template(prop) for statement in helpers.listify(statements): if (statement is None) or (statement.isNone()): continue quals = '' if statement.quals: if len(statement.quals) > 1: quals = [ '* {}'.format(PreviewItem.format_qual(qual)) for qual in statement.quals ] else: quals = [PreviewItem.format_qual(statement.quals[0])] ref = '' if statement.ref: ref = '\n{}'.format( PreviewItem.format_reference(statement.ref)) rows.append({ 'prop': prop, 'value': PreviewItem.format_itis(statement), 'quals': ' \n'.join(quals), 'references': ref }) # if any statement has a reference then add the reference column if any(row.get('references') for row in rows): default_ref = PreviewItem.make_text_italics('default reference') table_head += '! References\n' table_row += '| {references} \n' if self.ref: for row in rows: row['references'] = row['references'] or default_ref # start table construction table = table_head for row in rows: table += table_row.format(**row) table += table_end return table
def add_statement(self, prop_name, value, quals=None, ref=None): base = self.wd_item["statements"] prop = self.props[prop_name] if quals is None: quals = [] wd_claim = self.make_pywikibot_item(value) statement = self.make_statement(wd_claim) for qual in helpers.listify(quals): statement.addQualifier(qual) base.append({"prop": prop, "value": statement, "ref": ref})
def get_wdq(dataset=None, data=None): """Find all links from Wikidata to Kulturnav using WDQ. @todo: To replace with wdqs we need something like: SELECT ?item ?value WHERE { ?item p:P1248 ?data . ?item wdt:P1248 ?value . {?data pq:P972 wd:Q20742915} UNION {?data pq:P972 wd:Q20734454} } @param dataset: Q-id (or list of Q-ids) corresponding to a dataset. @type dataset: str or list of str @param data: dictionary to which data should be added @type data: dict @return: (timestamp, dict {qid: uuid}) @rtype: tuple (str, dict) """ # initialise if needed data = data or {} dataset = helpers.listify(dataset) or [] # make query pid = '1248' query = u'CLAIM[%s]' % pid if dataset: query += u'{CLAIM[' for d in dataset: query += u'972:%s,' % d.lstrip('Q') query = query.rstrip(',') + ']}' wd_queryset = wdquery.QuerySet(query) wd_query = wdquery.WikidataQuery(cacheMaxAge=0) j = wd_query.query(wd_queryset, props=[ str(pid), ]) # process data j = j['props'][pid] # extract pairs for i in j: data[u'Q%d' % i[0]] = i[2] # get current timestamp needle = u'Times :' stats = urllib2.urlopen(u'http://wdq.wmflabs.org/stats').read() stats = stats[stats.find(needle):] time = stats[len(needle):stats.find('\n')].strip(' -') return (time, data)
def resolve(self, entries, ids): """ Resolve a rule to return the resulting value. :param entries: all the data under @graph :param ids: a dict of all @id values :return: the matching value """ value = None if Rule.hasKeys(self.keys, entries): value = entries[self.target] # break here if no hit if not value: return None # convert values for viaId rules if self.viaId: # value can be either a single entry or a list values = helpers.listify(value) results = [] for val in values: if isinstance(self.viaId, dict): result = {} for key, via_id in self.viaId.iteritems(): result[key] = Rule.resolve_via_id(via_id, val, ids) try: if set(result.values()) == set([None]): # if all entries are None # @todo: should log these to spot schema changes return None results.append(result) except TypeError: pywikibot.output("Could not handle: %s" % result) return None else: # self.viaId is either string or tuple result = Rule.resolve_via_id(self.viaId, val, ids) if not result: # @todo: should log these to spot schema changes return None results.append(result) # reformat for output if len(results) == 1: value = results[0] # undo listify else: value = results return value
def get_nationality(bot, values): """Get the nationality/nationalities. @param bot: the instance of the bot calling upon the template @param bot: KulturnavBot @param values: the values extracted using the rules @type values: dict @return: nationalities @rtype: list of WD.Statement """ if values.get(u'person.nationality'): # there can be multiple values values[u'person.nationality'] = helpers.listify( values[u'person.nationality']) claim = [] for pn in values[u'person.nationality']: claim.append(WD.Statement(bot.location2Wikidata(pn))) if claim: return claim
def set_registration_no(self, values, protoclaims): """Identify registration number (P879) and add to claims. Adds the claim to the protoclaims dict. @param values: the values extracted using the rules @type values: dict @param protoclaims: the dict of claims to add @type protoclaims: dict """ values_target = values['navalVessel.registration'] if values_target: values_target = helpers.listify(values_target) claims = [] for val in values_target: if val['type'] == self.IKNO_K: # only one type is currently mapped claims.append(WD.Statement(val['number'])) if claims: protoclaims[u'P879'] = claims
def claims(self, values): """Add protoclaims. @param values: the values extracted using the rules @type values: dict @return: the protoclaims @rtype: dict PID-WD.Statement pairs """ # get basic person claims protoclaims = Person.get_claims(self, values) # add claim about natmus_artist_id if values[u'seeAlso'] is not None: if helpers.is_str(values[u'seeAlso']): values[u'seeAlso'] = helpers.listify(values[u'seeAlso']) for sa in values[u'seeAlso']: if u'collection.nationalmuseum.se/eMuseumPlus' in sa: object_id = sa.split('objectId=')[-1].split('&')[0] protoclaims['P2538'] = WD.Statement(object_id) break return protoclaims
def extractStatements(self, riksdagdata): """Extract possible statements from the riksdag data. param riksdagsdata: a dict return dict of properties and statments """ riksdagId = riksdagdata['intressent_id'] self.current_id = riksdagId # Handle statments protoclaims = {} protoclaims[GENDER_P] = self.matchGender(riksdagdata['kon']) protoclaims[PARTY_P] = self.matchParty(riksdagdata['parti']) protoclaims[LAST_NAME_P] = self.matchName( riksdagdata['efternamn'], 'lastName') protoclaims[FIRST_NAME_P] = self.matchName( riksdagdata['tilltalsnamn'], 'firstName') protoclaims[BIRTH_DATE_P] = self.matchBirth(riksdagdata['fodd_ar']) protoclaims[DEATH_DATE_P] = self.matchDeath(riksdagdata['status']) # position data is inconsistent as single entries are sometimes # not in a list. hence the listify protoclaims[POSITION_P] = self.handlePositions( helpers.listify(riksdagdata['personuppdrag']['uppdrag'])) # valkrets # personuppgifter # Handle aliases # Note that this gives a mistake in names such as "A von B" since # the "von" is not part of the sort key. fullName = helpers.reorder_names(riksdagdata['sorteringsnamn']) iortAlias = self.makeIortAlias(riksdagdata['iort'], fullName) names = set(fullName) if iortAlias: names.add(iortAlias) names = list(names) return protoclaims, names
def __init__(self, target, keys=None, viaId=None): """ Initialize the rule. Keys gives the resolver the environment in which the target is expected. Target is the first-pass value to be extracted from the data. The result can be a list of values. ViaId gives the path through which a second-pass value can be extracted from Target. For data which must remain connected (such as the start and end dates of a particular claim) a dict can be supplied to ViaId. Each entry is looked for under the same Target value but follows a different ViaId path. :param target: string key for which the value is wanted :param keys: list|string|None of keys which must be present (in addition to target). If none is supplied it defaults to a key in the top node of the graph. :param viaId: string|tuple|dict|None if the value of "target" should be matched to an @id entry where this key then gives the wanted value. If a tuple is supplied then each intermediate entry is matched to an @id entry. If a dict is supplied then the each value is treated as its own viaId and the returned value is a dict where each entry has been resolved. """ self.target = target self.viaId = viaId self.keys = [] keys = keys or 'inDataset' # default to the top node in the graph if keys is not None: self.keys += helpers.listify(keys) self.keys.append(target)
def commit_claims(self, protoclaims, item, default_ref): """ Add each claim (if new) and source it. :param protoclaims: a dict of claims with key: Prop number val: Statement|list of Statements :param item: the target entity :param default_ref: main/default reference to use """ for prop, statements in protoclaims.items(): if statements: statements = helpers.listify(statements) statements = set(statements) # eliminate potential duplicates for statement in statements: # check if None or a Statement(None) if (statement is not None) and (not statement.isNone()): # use internal reference if present, else the general ref = statement.ref or default_ref self.wd.addNewClaim(prop, statement, item, ref) # reload item so that next call is aware of changes item = self.wd.QtoItemPage(item.title()) item.exists()
def set_homeport(self, values, protoclaims): """Identify homePort (P504) and add, with start/end dates, to claims. Adds the claim to the protoclaims dict. @param values: the values extracted using the rules @type values: dict @param protoclaims: the dict of claims to add @type protoclaims: dict """ prop = u'P504' target_values = values[u'homePort'] main_key = 'location' if target_values: target_values = helpers.listify(target_values) claims = [] for val in target_values: claim = WD.Statement(self.location2Wikidata(val[main_key])) claims.append( helpers.add_start_end_qualifiers(claim, val[u'start'], val[u'end'])) if claims: protoclaims[prop] = claims
def populateValues(self, values, rules, hit): """ Populate values and check results given a hit. Given a list of values and a kulturnav hit, populate the values and check if result is problem free. @todo: raise Error instead of using problemFree solution param values: dict with keys and every value as None param rules: a dict with keys and values either: None: the exakt key is present in hit and its value is wanted a Rule: acording to the class above param hit: a kulturnav entry return bool problemFree """ ids = {} problemFree = True for entries in hit[u'@graph']: # populate ids for viaId rules if '@id' in entries.keys(): if entries['@id'] in ids.keys(): pywikibot.output('Non-unique viaID key: \n%s\n%s' % (entries, ids[entries['@id']])) ids[entries['@id']] = entries for entries in hit[u'@graph']: # handle rules for key, rule in rules.iteritems(): val = None if rule is None: if key in entries.keys(): val = entries[key] elif isinstance(rule, Rule): val = rule.resolve(entries, ids) # test and register found value if val is not None: if values[key] is None: values[key] = val else: pywikibot.output(u'duplicate entries for %s' % key) problemFree = False # the minimum which must have been identified if values[u'identifier'] is None: raise pywikibot.Error(u'Could not isolate the identifier from the ' u'KulturNav object! JSON layout must have ' u'changed. Crashing!') # dig into sameAs/exactMatch and seeAlso KulturnavBot.set_sameas_values(values) # only look at seeAlso if we found no Wikidata link and require one if self.require_wikidata and \ (not values[u'wikidata'] and values[u'seeAlso']): values[u'seeAlso'] = helpers.listify(values[u'seeAlso']) for sa in values[u'seeAlso']: if u'wikipedia' in sa: pywikibot.output(u'Found a Wikipedia link but no ' u'Wikidata link: %s %s' % (sa, values[u'identifier'])) problemFree = False if not problemFree: pywikibot.output(u'Found an issue with %s (%s), skipping' % (values['identifier'], values['wikidata'])) return problemFree