def _same_tree(cls, prop, data1, data2): sparql = SparqlQuery() # fixme: dependencies pattern = ('ASK { VALUES ?x1 { wd:%s } . VALUES ?x2 { wd:%s } . ' '?x1 wdt:%s* ?x2 }') item1 = ' wd:'.join(map(attrgetter('target.id'), data1)) item2 = ' wd:'.join(map(attrgetter('target.id'), data2)) tries = 3 for ask in (pattern % (item1, item2, prop), pattern % (item2, item1, prop)): res = False while True: try: res = sparql.ask(ask) except requests.exceptions.ConnectionError: tries -= 1 if tries == 0: raise time.sleep(1) continue else: break if res: return True return False
def _same_tree(cls, prop, data1, data2): sparql = SparqlQuery() # fixme: dependencies pattern = ('ASK { VALUES ?x1 { wd:%s } . VALUES ?x2 { wd:%s } . ' '?x1 wdt:%s* ?x2 }') item1 = ' wd:'.join(map(attrgetter('target.id'), data1)) item2 = ' wd:'.join(map(attrgetter('target.id'), data2)) tries = 3 for ask in (pattern % (item1, item2, prop), pattern % (item2, item1, prop)): res = False while True: try: res = sparql.ask(ask) except requests.exceptions.ConnectionError: tries -= 1 if tries == 0: raise time.sleep(1) continue else: break if res: return True return False
def __init__(self, generator, **kwargs): self.available_options.update({ 'always': True, 'class': 'Q10648343', 'min_labels': 1, }) super().__init__(**kwargs) self.store = QueryStore() self.sparql = SparqlQuery(repo=self.repo) self._generator = generator or self.custom_generator()
def __init__(self, **options): self.available_options.update({ 'step': 10, 'offset': 0, }) super().__init__(**options) self.cache = {} self.failed = {} self.sparql = SparqlQuery(repo=self.repo) self.store = QueryStore()
def __init__(self, generator, **kwargs): self.availableOptions.update({ 'always': True, 'class': 'Q15618652', 'min_labels': 1, }) super(DuosManagingBot, self).__init__(**kwargs) self.store = QueryStore() self.sparql = SparqlQuery(repo=self.repo) self._generator = generator or self.custom_generator()
def get_existing_items_with_instanceof_and_rfcnum(): sparqlquery = SparqlQuery() response = sparqlquery.query('SELECT ?rfcid ?item WHERE { ?item wdt:P31/wdt:P279* wd:Q212971 . ?item wdt:P892 ?rfcid }') bindings = response['results']['bindings'] existing_items = {} for binding in bindings: item_url = binding['item']['value'] result = re.search(r'(Q\d+)', item_url) if not result: print('Error: could not find Wikidata item identifier in SPARQL results obtained by get_existing_items_with_instanceof_and_rfcnum()') continue item = result.group(1) rfc = binding['rfcid']['value'] existing_items[rfc] = item return existing_items
def books_with_missing_labels_with_title(): """Find English books with missing labels, but with a title Missing labels are identified by checking if the label is equal to the QID Returns an iterable of (book QID, title) """ query = f""" SELECT ?book ?bookLabel ?title WHERE {{ ?book wdt:{wp.INSTANCE_OF.pid} wd:{wp.BOOK}; wdt:{wp.LANGUAGE_OF_WORK_OR_NAME.pid} wd:{wp.ENGLISH}; wdt:{wp.TITLE.pid} ?title; FILTER(REGEX(?bookLabel, SUBSTR(STR(?book), 32 ))) FILTER((LANG(?title)) = "en") SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". ?book rdfs:label ?bookLabel. }} }} ORDER BY (?title) """ print(query) results = SparqlQuery(repo=Site().data_repository()).select(query) for result in results: book_label = result["bookLabel"] title = result["title"] yield book_label, title
def episodes_with_titles_and_missing_labels(): """Find English show episodes with missing labels, but with a title Missing labels are identified by checking if the label is equal to the QID Returns an iterable of (episode QID, title, series label) """ query = f""" SELECT ?episode ?episodeLabel ?seriesLabel ?title WHERE {{ ?episode wdt:{wp.INSTANCE_OF.pid} wd:{wp.TELEVISION_SERIES_EPISODE}; wdt:{wp.ORIGNAL_LANGUAGE_OF_FILM_OR_TV_SHOW.pid} wd:{wp.ENGLISH}. OPTIONAL {{ ?episode wdt:{wp.TITLE.pid} ?title. }} OPTIONAL {{ ?episode wdt:{wp.PART_OF_THE_SERIES.pid} ?series. }} # Skip "http://www.wikidata.org/entity/" (31 characters) FILTER(REGEX(?episodeLabel, SUBSTR(STR(?episode), 32))) FILTER((LANG(?title)) = "en") SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". ?episode rdfs:label ?episodeLabel. ?series rdfs:label ?seriesLabel. }} }} ORDER BY (?seriesLabel) (?title) """ print(query) results = SparqlQuery(repo=Site().data_repository()).select(query) for result in results: episode_id = result["episode"].split("/")[-1] title = result["title"] series_label = result["seriesLabel"] yield episode_id, title, series_label
def movies_with_missing_labels_with_title(): """Find English movies with missing labels, but with a title Missing labels are identified by checking if the label is equal to the QID Returns an iterable of (movie QID, title) """ query = f"""SELECT ?movieLabel ?title ?imdbId WHERE {{ ?movie wdt:{wp.INSTANCE_OF.pid} wd:Q11424; wdt:{wp.ORIGNAL_LANGUAGE_OF_FILM_OR_TV_SHOW.pid} wd:{wp.ENGLISH}; wdt:{wp.TITLE.pid} ?title; wdt:{wp.IMDB_ID.pid} ?imdbId. # Skip "http://www.wikidata.org/entity/" (31 characters) FILTER((REGEX(?movieLabel, SUBSTR(STR(?movie), 32 )))) FILTER((LANG(?title)) = "en") SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". ?movie rdfs:label ?movieLabel. }} }} ORDER BY (?title) """ print(query) results = SparqlQuery(repo=Site().data_repository()).select(query) for result in results: movie_label = result["movieLabel"] title = result["title"] yield movie_label, title
def movies_with_missing_titles(): """find English movies with missing titles, but with label Returns an iterable of (movie QID, movie label) """ query = f""" SELECT ?movie ?movieLabel WHERE {{ ?movie wdt:{wp.INSTANCE_OF.pid} wd:Q11424; wdt:{wp.ORIGNAL_LANGUAGE_OF_FILM_OR_TV_SHOW.pid} wd:{wp.ENGLISH}. OPTIONAL {{ ?movie wdt:{wp.TITLE.pid} ?title. }} OPTIONAL {{ ?movie wdt:P345 ?imdbId. }} FILTER(!(BOUND(?title))) FILTER((BOUND(?imdbId))) # Skip "http://www.wikidata.org/entity/" (31 characters) FILTER(!(REGEX(?movieLabel, SUBSTR(STR(?movie), 32 )))) SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". ?movie rdfs:label ?movieLabel. }} }} ORDER BY (?movieLabel) """ print(query) results = SparqlQuery(repo=Site().data_repository()).select(query) for result in results: movie_id = result["movie"].split("/")[-1] movie_label = result["movieLabel"] yield movie_id, movie_label
def __init__(self, generator, **kwargs): self.availableOptions.update({ 'always': True, 'class': 'Q15618652', 'min_labels': 1, }) super(DuosManagingBot, self).__init__(**kwargs) self.store = QueryStore() self.sparql = SparqlQuery(repo=self.repo) self._generator = generator or self.custom_generator()
def get_existing_items_with_rfc_dois(): sparqlquery = SparqlQuery() response = sparqlquery.query('SELECT ?doi ?item WHERE { ?item wdt:P356 ?doi . FILTER regex(?doi, \'^10.17487/RFC\\\\d{4}\') }') bindings = response['results']['bindings'] existing_items = {} for binding in bindings: item_url = binding['item']['value'] result = re.search(r'(Q\d+)', item_url) if not result: print('Error: could not find Wikidata item identifier in SPARQL results obtained by get_existing_items_with_rfc_dois()') continue item = result.group(1) doi = binding['doi']['value'] result = re.search(r'RFC(\d+)', doi) if not result: print('Error: could not find RFC identifier in SPARQL results obtained by get_existing_items_with_rfc_dois()') continue rfc = result.group(1) existing_items[rfc] = item return existing_items
def items_with_missing_labels_with_title(): """Find items with missing labels, but with a title Missing labels are identified by checking if the label is equal to the QID Returns an iterable of (item, item QID, title) """ query = f""" SELECT DISTINCT ?item ?itemId ?title WHERE {{ ?item wdt:{wp.INSTANCE_OF.pid} ?itemType; wdt:{wp.TITLE.pid} ?title. VALUES ?itemType {{ wd:{wp.TELEVISION_SERIES.ljust(10, " ")} # television series wd:{wp.TELEVISION_SERIES_EPISODE.ljust(10, " ")} # television series episode wd:{wp.BOOK.ljust(10, " ")} # book wd:{wp.FILM.ljust(10, " ")} # film wd:{wp.SILENT_FILM.ljust(10, " ")} # silent film wd:{wp.LITERARY_WORK.ljust(10, " ")} # literary work wd:{wp.WRITTEN_WORK.ljust(10, " ")} # written work wd:{wp.PERIODICAL.ljust(10, " ")} # periodical }} # Skip "http://www.wikidata.org/entity/" (31 characters) BIND(SUBSTR(STR(?item), 32 ) AS ?itemId) # Only look for titles that are in English, since we add the English label FILTER((LANG(?title)) = "en") # The label will be the same as the QID if the label is missing FILTER(REGEX(?itemLabel, ?itemId)) SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". ?item rdfs:label ?itemLabel. }} }} """ print(query) results = SparqlQuery(repo=Site().data_repository()).select(query) for result in results: item_link = result["item"] item_id = result["itemId"] title = result["title"] yield item_link, item_id, title
def episodes(season_id): """Find episodes for a given season (specified by QID) Returns an iterable of (season ordinal, episode QID, episode title) """ query = f""" SELECT ?seasonOrdinal ?episode ?episodeTitle WHERE {{ ?episode wdt:{wp.INSTANCE_OF.pid} wd:{wp.TELEVISION_SERIES_EPISODE}; wdt:{wp.SEASON.pid} wd:{season_id}; wdt:{wp.TITLE.pid} ?episodeTitle; (p:{wp.SEASON.pid}/pq:{wp.SERIES_ORDINAL.pid}) ?seasonOrdinal . }} ORDER BY (?seasonOrdinal) """ results = SparqlQuery(repo=Site().data_repository()).select(query) for result in results: ordinal = int(result["seasonOrdinal"]) episode_id = result["episode"].split("/")[-1] title = result["episodeTitle"] yield ordinal, episode_id, title
def get_all(self): physician = 'Q39631' query = """SELECT ?subclass_of WHERE { SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } ?subclass_of wdt:P279 wd:Q39631. } """ sparql = SparqlQuery() results = sparql.query(query) physician_types = set([ q['subclass_of']['value'].split("/")[-1] for q in results['results']['bindings'] ]) physician_types.add(physician) physicians = set() for t in physician_types: query = sub( 'physician_type', t, """SELECT ?physician WHERE { SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } ?physician wdt:P106 wd:physician_type. ?physician wdt:P31 wd:Q5. } """) sparql = SparqlQuery() try: results = sparql.query(query) results = set([ q['physician']['value'].split("/")[-1] for q in results['results']['bindings'] ]) except Exception as e: pprint(e) physicians = physicians.union(results) return physicians
def do_call(): from pywikibot.data.sparql import SparqlQuery sparql = SparqlQuery() results = sparql.query(query) idle_add(lambda: callback(results, *cb_args, **kwargs)) return None
from pywikibot import pagegenerators from pywikibot.data.sparql import SparqlQuery import re QUERY = """ SELECT ?id WHERE { ?site schema:name "%s"@en; schema:about ?id . } """ QUERY_LINK = """ SELECT ?name WHERE { ?page schema:about wd:%s; schema:inLanguage "en"; schema:name ?name . } """ QID = sys.argv[1] sparql_query = SparqlQuery() linkre = re.compile('\[\[([^|]+)?(\|.+)?]]') titlere = re.compile('\{\{lang-(\w+)\|(.+)}}') yearre = re.compile('\[\[(\d+) ') def find_by_label(label): match = linkre.match(label) if match: label = match.group(1) page = pywikibot.Page(site, label) if page.isRedirectPage(): redir = page.getRedirectTarget() label = redir.title() sparql = QUERY % (label) # print(sparql)
import pywikibot import sys import json import uuid from pywikibot.data.sparql import SparqlQuery HELP = """ Fix language in title from labels """ site = pywikibot.Site("wikidata", "wikidata") site.throttle.setDelays(writedelay=1) repo = site.data_repository() sparql_query = SparqlQuery() TITLES_QUERY = """ SELECT * WHERE { ?p wdt:P31 wd:Q3305213 . ?p (wdt:P1705|wdt:P1476) ?title . FILTER(lang(?title) = "und") } LIMIT 100 """ def fix_lang(claims, labels): for claim in claims: if claim.getSnakType() != 'value': continue target = claim.getTarget() print(target) if not isinstance(target, pywikibot.WbMonolingualText): continue if target.language != 'und':
import pywikibot import sys import json import uuid from pywikibot.data.sparql import SparqlQuery HELP = """ This script applies painting title to label in the same language """ site = pywikibot.Site("wikidata", "wikidata") site.throttle.setDelays(writedelay=1) repo = site.data_repository() sparql_query = SparqlQuery() TITLES_QUERY = """ SELECT ?p ?title WHERE { ?p wdt:P31 wd:Q3305213 . ?p (wdt:P1705|wdt:P1476) ?title . BIND(lang(?title) as ?lt) FILTER(?lt != "und") FILTER NOT EXISTS { ?p rdfs:label ?pl . FILTER(lang(?pl) = ?lt) } } LIMIT 100 """ results = sparql_query.select(TITLES_QUERY, full_data=True) #print(results) for result in results: lang = result['title'].language
class ExternalIdSlicingBot(WikidataEntityBot): blacklist = {'P2013'} use_from_page = False def __init__(self, **options): self.available_options.update({ 'step': 10, 'offset': 0, }) super().__init__(**options) self.cache = {} self.failed = {} self.sparql = SparqlQuery(repo=self.repo) self.store = QueryStore() @property def generator(self): step = self.opt['step'] opts = { # fixme: don't use this word 'blacklist': ' wd:'.join(self.blacklist), 'limit': step, } offset = self.opt['offset'] while True: pywikibot.output('\nLoading items (offset %i)...' % offset) opts['offset'] = offset ask = self.store.build_query('ask_externalid_props', **opts) if not self.sparql.ask(ask): break query = self.store.build_query('external-ids', **opts) gen = PreloadingEntityGenerator( WikidataSPARQLPageGenerator(query, site=self.repo)) yield from gen offset += step def treat_page_and_item(self, page, item): for prop, claims in item.claims.items(): if prop in self.blacklist: continue if claims[0].type != 'external-id': continue for cl in claims: if not cl.target or not cl.target.startswith('http'): continue formatter, regex = self.get_formatter_and_regex(prop) if not formatter: pywikibot.output("%s doesn't have a formatter" % prop) break value = self.find_value(cl.target, formatter) if not value: pywikibot.output( 'Value not found in "%s" for property %s' % (cl.target, prop)) self.failed.setdefault(prop, set()).add(item) continue if regex: try: match = re.match('(%s)' % regex, value) except re.error: pywikibot.output('Couldn\'t apply regex "%s"' % regex) break if not match: pywikibot.output('Value "%s" not matched by regex ' '"%s"' % (value, regex)) self.failed.setdefault(prop, set()).add(item) continue value = match.group() summary = 'harvested the identifier based on [[Property:P1630]]' if regex: summary += ' and [[Property:P1793]]' cl.changeTarget(value, summary=summary) def get_formatter_and_regex(self, prop): if prop not in self.cache: formatter = regex = None ppage = pywikibot.PropertyPage(self.repo, prop) if 'P1630' in ppage.claims: if len(ppage.claims['P1630']) > 1: preferred = [ cl for cl in ppage.claims['P1630'] if cl.rank == 'preferred' ] if len(preferred) == 1: formatter = preferred[0].target else: formatter = ppage.claims['P1630'][0].target if 'P1793' in ppage.claims: if len(ppage.claims['P1793']) > 1: preferred = [ cl for cl in ppage.claims['P1793'] if cl.rank == 'preferred' ] if len(preferred) == 1: regex = preferred[0].target else: regex = ppage.claims['P1793'][0].target self.cache[prop] = (formatter, regex) return self.cache[prop] def strip_init_stuff(self, string): if string.startswith(('http://', 'https://')): string = string.partition('//')[2] if string.startswith('www.'): string = string[4:] return string def find_value(self, url, formatter): url = self.strip_init_stuff(url) formatter = self.strip_init_stuff(formatter) value = pywikibot.page.url2unicode(url) split = formatter.split('$1') if not value.startswith(split[0]): return None if not split[1]: return value[len(split[0]):].rstrip('/') value = value[:-len(split[-1])] try: index = value.index(split[1], len(split[0])) except ValueError: return None else: return value[len(split[0]):index].rstrip('/') def exit(self): if self.failed: text = '' for prop in sorted(self.failed): text += '* [[Property:%s]]:\n' % prop for item in sorted(self.failed[prop]): text += '** [[%s]]\n' % item.title() page = pywikibot.Page( self.repo, 'User:%s/Wrong external ids' % self.repo.username()) page.put(text, summary='update') super().exit()
def sync_edition_olids_by_isbns(dry_run=False, limit=None): """ Find editions on Wikidata and Open Library with the same ISBNs and add the Open Library ID to Wikidata and the Wikidata ID to Open Library. """ wd = pywikibot.Site("wikidata", "wikidata") wd_repo = wd.data_repository() wdqs = SparqlQuery() # Wikidata Query Service ol = OpenLibrary() # append date to query avoid getting cached results query = QUERY + f"\n # {datetime.datetime.now()}" sparql_results = wdqs.select(query) # Group by key (sparql hits timeouts when we do the grouping there) qid_to_isbns = dict() for row in sparql_results: qid = row['item'].split('/')[-1] if qid not in qid_to_isbns: qid_to_isbns[qid] = [] qid_to_isbns[qid].append(normalize_isbn(row['isbn'])) logger.info("Found %d editions to update", len(qid_to_isbns)) ol_books_modified = 0 wd_items_modified = 0 for qid, isbns in qid_to_isbns.items(): logger.debug("Processing %s", qid) for isbn_len in [10, 13]: count = len([isbn for isbn in isbns if len(isbn) == isbn_len]) if count > 1: logger.warning("%s has multiple isbn%ss (%d)", qid, isbn_len, count) ol_books = [ol.Edition.get(isbn=isbn) for isbn in isbns] ol_books = [book for book in ol_books if book and book.olid != 'None'] ol_books = remove_dupes(ol_books, lambda ed: ed.olid) logger.info("Found %d Open Library book(s) for %s (isbns %s)", len(ol_books), qid, ', '.join(isbns)) if len(ol_books) > 1: logger.warning( "Multiple (%d) Open Library books for %s (isbns %s)", len(ol_books), qid, ', '.join(isbns)) # update open library data for book in ol_books: if 'wikidata' not in book.identifiers: book.identifiers['wikidata'] = [] book_qids = book.identifiers['wikidata'] if qid in book_qids: logger.warning("%s already has qid %s", book.olid, qid) continue book_qids.append(qid) if len(book_qids) > 1: logger.warning("%s now has multiple (%d) qids (%s)", book.olid, len(book_qids), ', '.join(book_qids)) if not dry_run: book.save("[sync_edition_olids] add wikidata identifier") logger.debug("Added %s to %s", qid, book.olid) ol_books_modified += 1 # update wikidata data for book in ol_books: item = pywikibot.ItemPage(wd_repo, qid) claim = make_str_claim(wd_repo, 'P648', book.olid) if not dry_run: item.addClaim(claim, bot=True) logger.debug("Added %s to %s", book.olid, qid) wd_items_modified += 1 if limit: ol_books_limit = ol_books_modified >= limit wd_items_limit = wd_items_modified >= limit if ol_books_limit and wd_items_limit: logger.info( "Hit limit of %s on both Open Library and Wikidata; Stopping.", limit) elif ol_books_limit: logger.info("Hit limit of %s on Open Library; Stopping.", limit) elif wd_items_limit: logger.info("Hit limit of %s on Wikidata; Stopping.", limit) if ol_books_limit or wd_items_limit: break logger.info("Updated %d Open Library books and %d Wikidata items", ol_books_modified, wd_items_modified)
class DuosManagingBot(WikidataEntityBot): conj = { 'af': ' en ', 'az': ' və ', 'bg': ' и ', 'br': ' ha ', 'ca': ' i ', 'cs': ' a ', 'cy': ' a ', 'da': ' og ', 'de': ' und ', 'el': ' και ', 'en': ' and ', 'en-gb': ' and ', 'eo': ' kaj ', 'es': ' y ', 'et': ' ja ', 'eu': ' eta ', 'fi': ' ja ', 'fr': ' et ', 'fy': ' en ', 'gl': ' e ', 'hr': ' i ', 'hu': ' és ', 'id': ' dan ', 'it': ' e ', 'ka': ' და ', 'la': ' et ', 'lt': ' ir ', 'lv': ' un ', 'ms': ' dan ', 'nb': ' og ', 'nl': ' en ', 'nn': ' og ', 'oc': ' e ', 'pl': ' i ', 'pt': ' e ', 'ro': ' și ', 'ru': ' и ', 'sk': ' a ', 'sl': ' in ', 'sr': ' и ', 'sv': ' och ', 'tr': ' ve ', 'uk': ' і ', 'vi': ' và ', 'war': ' ngan ', } distribute_properties = { 'P21', 'P22', 'P25', 'P27', 'P40', 'P53', 'P106', 'P1412', } class_to_relation = [ ('Q14756018', 'twin'), ('Q14073567', 'sibling'), ('Q3046146', 'spouse'), ] relation_map = { #'partner': 'P451', todo 'sibling': 'P3373', 'spouse': 'P26', 'twin': 'P3373', } use_from_page = False def __init__(self, generator, **kwargs): self.availableOptions.update({ 'always': True, 'class': 'Q15618652', 'min_labels': 1, }) super(DuosManagingBot, self).__init__(**kwargs) self.store = QueryStore() self.sparql = SparqlQuery(repo=self.repo) self._generator = generator or self.custom_generator() def skip_page(self, item): if super(DuosManagingBot, self).skip_page(item): return True if 'P31' not in item.claims: pywikibot.output('%s is missing P31 property' % item) return True if 'P527' in item.claims: pywikibot.output('%s already has P527 property' % item) return True return False def custom_generator(self): kwargs = {'class': self.getOption('class')} query = self.store.build_query('duos', **kwargs) return pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo) @property def generator(self): return pagegenerators.PreloadingEntityGenerator(self._generator) def get_relation(self, item): ask_pattern = 'ASK { wd:%s wdt:P31/wdt:P279* wd:%%s }' % item.id for key, rel in self.class_to_relation: if self.sparql.ask(ask_pattern % key): return rel return None def get_labels(self, item, relation): labels = [{}, {}] for lang in set(item.labels.keys()) & set(self.conj.keys()): for conj in (self.conj[lang], ' & '): label = item.labels[lang].partition(' (')[0] if ', ' in label: continue split = label.split(conj) if len(split) != 2: continue split0 = split[0].split() split1 = split[1].split() if split1[0].islower(): continue if len(split1) > len(split0): if len(split1) > 2 and split1[-2].islower(): split1[-2:] = [' '.join(split1[-2:])] if len(split1) - len(split0) == 1: # if items are in a relation, then they probably share # their surname if relation: split[0] += ' %s' % split1[-1] split0.append(split1[-1]) if len(split0) > 1 or len(split1) == 1: for i in [0, 1]: labels[i][lang] = split[i] break return labels def treat_page_and_item(self, page, item): relation = self.get_relation(item) labels = self.get_labels(item, relation) count = max(map(len, labels)) if count == 0: pywikibot.output('No labels, skipping...') return if count < self.getOption('min_labels'): pywikibot.output('Too few labels (%i), skipping...' % count) return to_add = [] to_remove = [] for prop in self.distribute_properties: for claim in item.claims.get(prop, []): if claim.getTarget(): to_remove.append(claim) json = claim.toJSON() json.pop('id') to_add.append(json) items = [self.create_item(item, data, relation, to_add) for data in labels] if self.relation_map.get(relation): for it, target in zip(items, reversed(items)): claim = pywikibot.Claim(self.repo, self.relation_map[relation]) claim.setTarget(target) self.user_add_claim(it, claim) for it in items: claim = pywikibot.Claim(self.repo, 'P527') claim.setTarget(it) self.user_add_claim(item, claim) for claim in to_remove: pywikibot.output('Removing %s --> %s' % ( claim.id, claim.getTarget())) json = claim.toJSON() json['remove'] = '' self.user_edit_entity( item, {'claims':[json]}, summary='moved [[Property:%s]] to %s' % ( claim.id, ' & '.join(map(methodcaller( 'title', as_link=True, insite=self.repo), items)))) def create_item(self, item, labels, relation, to_add): pywikibot.output('Creating item (relation "%s")...' % relation) new_item = pywikibot.ItemPage(self.repo) data = {'labels': labels} self.user_edit_entity( new_item, data, summary='based on data in %s' % item.title( as_link=True, insite=self.repo), asynchronous=False) claim = pywikibot.Claim(self.repo, 'P31') claim.setTarget(pywikibot.ItemPage(self.repo, 'Q5')) self.user_add_claim(new_item, claim) ## if relation == 'twin': ## claim = pywikibot.Claim(self.repo, 'P31') ## claim.setTarget(pywikibot.ItemPage(self.repo, 'Q159979')) ## self.user_add_claim(new_item, claim) claim = pywikibot.Claim(self.repo, 'P361') claim.setTarget(item) self.user_add_claim(new_item, claim) for json in to_add: temp_claim = pywikibot.Claim.fromJSON(self.repo, json) pywikibot.output('Adding %s --> %s' % ( temp_claim.id, temp_claim.getTarget())) self.user_edit_entity( new_item, {'claims':[json]}, summary='moving [[Property:%s]] from %s' % ( temp_claim.id, item.title(as_link=True, insite=self.repo))) return new_item
def _query(q): return SparqlQuery().select(q)
""" file_name_ssb = '07459_20220224-190529.json' year_adding = 2022 test_edit = '[[Wikidata:Requests for permissions/Bot/IngeniousBot 2|Test edit]]: ' query_municipality = """ SELECT DISTINCT ?item ?value WHERE { ?item wdt:P2504 ?value; wdt:P31 wd:Q755707, wd:Q755707; p:P1082 _:b30. _:b30 pq:P585 ?pointintime; rdf:type wikibase:BestRank. FILTER(?pointintime != "2022-01-01T00:00:00Z"^^xsd:dateTime) } """ wikiquery = SparqlQuery() xml = wikiquery.query(query_municipality) # Opening JSON file from SSB f = open(file_name_ssb) # returns JSON object as a dictionary data = json.load(f) # the dict for data data_mun = {} # Iterating through the json blacklist = ['21-22', '23', 'Rest'] k_num = data['dataset']['dimension']['Region']['category']['index'] for i in k_num:
def query(q): return SparqlQuery.select(q)
class DuosManagingBot(WikidataEntityBot): conj = { 'af': ' en ', 'az': ' və ', 'bg': ' и ', 'br': ' ha ', 'ca': ' i ', 'cs': ' a ', 'cy': ' a ', 'da': ' og ', 'de': ' und ', 'el': ' και ', 'en': ' and ', 'en-gb': ' and ', 'eo': ' kaj ', 'es': ' y ', 'et': ' ja ', 'eu': ' eta ', 'fi': ' ja ', 'fr': ' et ', 'fy': ' en ', 'gl': ' e ', 'hr': ' i ', 'hu': ' és ', 'id': ' dan ', 'it': ' e ', 'ka': ' და ', 'la': ' et ', 'lt': ' ir ', 'lv': ' un ', 'ms': ' dan ', 'nb': ' og ', 'nl': ' en ', 'nn': ' og ', 'oc': ' e ', 'pl': ' i ', 'pt': ' e ', 'ro': ' și ', 'ru': ' и ', 'sk': ' a ', 'sl': ' in ', 'sr': ' и ', 'sv': ' och ', 'tr': ' ve ', 'uk': ' і ', 'vi': ' và ', 'war': ' ngan ', } distribute_properties = [ 'P21', 'P22', 'P25', 'P27', 'P40', 'P53', 'P106', 'P1412', ] class_to_relation = [ ('Q106925878', 'father-son'), ('Q14756018', 'twin'), ('Q14073567', 'sibling'), ('Q3046146', 'spouse'), # TODO: ('Q1141470', 'comedians'), not a "relation by blood" ] relation_map = { 'sibling': 'P3373', 'spouse': 'P26', 'twin': 'P3373', # TODO: 'partner': 'P451', #'father-son': '', we don't know who is who #'comedians': 'P1327', } use_from_page = False def __init__(self, generator, **kwargs): self.available_options.update({ 'always': True, 'class': 'Q10648343', 'min_labels': 1, }) super().__init__(**kwargs) self.store = QueryStore() self.sparql = SparqlQuery(repo=self.repo) self._generator = generator or self.custom_generator() def skip_page(self, item): if super().skip_page(item): return True if 'P31' not in item.claims: pywikibot.output('%s is missing P31 property' % item) return True if 'P527' in item.claims: pywikibot.output('%s already has P527 property' % item) return True return False def custom_generator(self): kwargs = {'class': self.opt['class']} query = self.store.build_query('duos', **kwargs) return pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo) @property def generator(self): return pagegenerators.PreloadingEntityGenerator(self._generator) def get_relation(self, item): ask_pattern = 'ASK { wd:%s wdt:P31/wdt:P279* wd:%%s }' % item.id for key, rel in self.class_to_relation: if self.sparql.ask(ask_pattern % key): return rel return None def get_labels(self, item, relation): labels = [{}, {}] for lang in item.labels.keys() & self.conj.keys(): for conj in (self.conj[lang], ' & '): label = item.labels[lang].partition(' (')[0] if ', ' in label: continue split = label.split(conj) if len(split) != 2: continue split0 = split[0].split() split1 = split[1].split() if split1[0].islower(): continue # TODO: if len(split1) > 1 and split1[0][-1] == '.': if len(split1) > len(split0): if len(split1) > 2 and split1[-2].islower(): split1[-2:] = [' '.join(split1[-2:])] if len(split1) - len(split0) == 1: # if items are in a relation, then # they probably share their surname if relation: split[0] += ' %s' % split1[-1] split0.append(split1[-1]) if len(split0) > 1 or len(split1) == 1: labels[0][lang] = split[0] labels[1][lang] = split[1] break return labels def treat_page_and_item(self, page, item): relation = self.get_relation(item) labels = self.get_labels(item, relation) count = max(map(len, labels)) if count == 0: pywikibot.output('No labels, skipping...') return if count < self.opt['min_labels']: pywikibot.output('Too few labels (%i), skipping...' % count) return to_add = [] to_remove = [] for prop in self.distribute_properties: for claim in item.claims.get(prop, []): if claim.getTarget(): to_remove.append(claim) json = claim.toJSON() json.pop('id') to_add.append(json) items = [self.create_item(item, data, relation, to_add) for data in labels] if self.relation_map.get(relation): for it, target in zip(items, reversed(items)): claim = pywikibot.Claim(self.repo, self.relation_map[relation]) claim.setTarget(target) self.user_add_claim(it, claim) for it in items: claim = pywikibot.Claim(self.repo, 'P527') claim.setTarget(it) self.user_add_claim(item, claim) for claim in to_remove: pywikibot.output('Removing %s --> %s' % ( claim.id, claim.getTarget())) json = claim.toJSON() json['remove'] = '' summary = 'moved [[Property:{}]] to {} & {}'.format( claim.id, items[0].title(as_link=True, insite=self.repo), items[1].title(as_link=True, insite=self.repo) ) self.user_edit_entity(item, {'claims':[json]}, summary=summary) def create_item(self, item, labels, relation, to_add): pywikibot.output('Creating item (relation "%s")...' % relation) new_item = pywikibot.ItemPage(self.repo) self.user_edit_entity( new_item, {'labels': labels}, asynchronous=False, summary='based on data in %s' % item.title( as_link=True, insite=self.repo)) claim = pywikibot.Claim(self.repo, 'P31') claim.setTarget(pywikibot.ItemPage(self.repo, 'Q5')) self.user_add_claim(new_item, claim) claim = pywikibot.Claim(self.repo, 'P361') claim.setTarget(item) self.user_add_claim(new_item, claim) for json in to_add: temp_claim = pywikibot.Claim.fromJSON(self.repo, json) pywikibot.output('Adding %s --> %s' % ( temp_claim.id, temp_claim.getTarget())) self.user_edit_entity( new_item, {'claims':[json]}, summary='moving [[Property:%s]] from %s' % ( temp_claim.id, item.title(as_link=True, insite=self.repo))) return new_item
#!/usr/bin/python3 import pywikibot from pywikibot.data.sparql import SparqlQuery import sys """ Check "no data" properties in case there's data now """ sparql_query = SparqlQuery() START_END_QUERY = """ PREFIX q: <http://www.wikidata.org/prop/qualifier/> SELECT DISTINCT ?s WHERE { BIND (p:%s as ?prop) ?s ?prop ?st . # One claim with start time ?st q:P580 ?t . # and no end time OPTIONAL { ?st q:P582 ?t2 } FILTER(!bound(?t2)) ?st wikibase:rank wikibase:NormalRank. # it's best rank, i.e. no preferred ?st a wikibase:BestRank . # Another claim ?s ?prop ?st2 . FILTER(?st2 != ?st) # with an end time ?st2 q:P582 ?t3 . # and it's not a dead person OPTIONAL { ?s wdt:P570 ?d } FILTER(!bound(?d)) ?st2 wikibase:rank wikibase:NormalRank.
QUERY = """ SELECT ?id ?idLabel WHERE { ?id p:P2067/psv:P2067 [ wikibase:quantityUnit wd:Q199 ] . ?id wdt:P31 wd:Q848944 . FILTER(?id != wd:Q4115189 && ?id != wd:Q13406268 && ?id != wd:Q15397819) SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . } } """ TON = 'http://www.wikidata.org/entity/Q752079' VOLUME = 'P2234' site = pywikibot.Site("wikidata", "wikidata") repo = site.data_repository() site.throttle.setDelays(writedelay=1) sparql_query = SparqlQuery() items = sparql_query.get_items(QUERY, item_name="id") print("%d items found" % len(items)) for item in items: qid = item.strip() if qid[0:5] == 'http:': # strip http://www.wikidata.org/entity/ qid = qid[31:] item = pywikibot.ItemPage(repo, qid) item.get() if PROP not in item.claims: print("No %s for %s, skip!" % (PROP, qid)) continue badclaims = [] for claim in item.claims[PROP]:
# Another claim ?s ?prop ?st2 . FILTER(?st2 != ?st) # with an end time ?st2 q:P582 ?t3 . } LIMIT 10 """ LABELS = """ SELECT ?p ?pLabel { VALUES ?p { %s } ?p rdfs:label ?pLabel FILTER(lang(?pLabel) = 'en') }""" sparql_query = SparqlQuery() fromID = int(sys.argv[1]) toID = int(sys.argv[2]) def batch(iterable, n=1): l = len(iterable) for ndx in range(0, l, n): yield iterable[ndx:min(ndx + n, l)] for chunk in batch(range(fromID, toID), BATCH_SIZE): candidates = set() props = ' '.join(["p:P" + str(x) for x in chunk]) sparql = QUERY % props