Пример #1
0
    def _same_tree(cls, prop, data1, data2):
        sparql = SparqlQuery() # fixme: dependencies
        pattern = ('ASK { VALUES ?x1 { wd:%s } . VALUES ?x2 { wd:%s } . '
                   '?x1 wdt:%s* ?x2 }')
        item1 = ' wd:'.join(map(attrgetter('target.id'), data1))
        item2 = ' wd:'.join(map(attrgetter('target.id'), data2))
        tries = 3
        for ask in (pattern % (item1, item2, prop),
                    pattern % (item2, item1, prop)):
            res = False
            while True:
                try:
                    res = sparql.ask(ask)
                except requests.exceptions.ConnectionError:
                    tries -= 1
                    if tries == 0:
                        raise
                    time.sleep(1)
                    continue
                else:
                    break
            if res:
                return True

        return False
Пример #2
0
    def _same_tree(cls, prop, data1, data2):
        sparql = SparqlQuery()  # fixme: dependencies
        pattern = ('ASK { VALUES ?x1 { wd:%s } . VALUES ?x2 { wd:%s } . '
                   '?x1 wdt:%s* ?x2 }')
        item1 = ' wd:'.join(map(attrgetter('target.id'), data1))
        item2 = ' wd:'.join(map(attrgetter('target.id'), data2))
        tries = 3
        for ask in (pattern % (item1, item2, prop),
                    pattern % (item2, item1, prop)):
            res = False
            while True:
                try:
                    res = sparql.ask(ask)
                except requests.exceptions.ConnectionError:
                    tries -= 1
                    if tries == 0:
                        raise
                    time.sleep(1)
                    continue
                else:
                    break
            if res:
                return True

        return False
 def __init__(self, generator, **kwargs):
     self.available_options.update({
         'always': True,
         'class': 'Q10648343',
         'min_labels': 1,
     })
     super().__init__(**kwargs)
     self.store = QueryStore()
     self.sparql = SparqlQuery(repo=self.repo)
     self._generator = generator or self.custom_generator()
 def __init__(self, **options):
     self.available_options.update({
         'step': 10,
         'offset': 0,
     })
     super().__init__(**options)
     self.cache = {}
     self.failed = {}
     self.sparql = SparqlQuery(repo=self.repo)
     self.store = QueryStore()
Пример #5
0
 def __init__(self, generator, **kwargs):
     self.availableOptions.update({
         'always': True,
         'class': 'Q15618652',
         'min_labels': 1,
     })
     super(DuosManagingBot, self).__init__(**kwargs)
     self.store = QueryStore()
     self.sparql = SparqlQuery(repo=self.repo)
     self._generator = generator or self.custom_generator()
Пример #6
0
def get_existing_items_with_instanceof_and_rfcnum():
	sparqlquery = SparqlQuery()
	response = sparqlquery.query('SELECT ?rfcid ?item WHERE { ?item wdt:P31/wdt:P279* wd:Q212971 . ?item wdt:P892 ?rfcid }')
	bindings = response['results']['bindings']
	existing_items = {}
	for binding in bindings:
		item_url = binding['item']['value']
		result = re.search(r'(Q\d+)', item_url)
		if not result:
			print('Error: could not find Wikidata item identifier in SPARQL results obtained by get_existing_items_with_instanceof_and_rfcnum()')
			continue
		item = result.group(1)
		rfc = binding['rfcid']['value']
		existing_items[rfc] = item
	return existing_items
Пример #7
0
def books_with_missing_labels_with_title():
    """Find English books with missing labels, but with a title

      Missing labels are identified by checking if the label is equal to
      the QID

      Returns an iterable of (book QID, title)
  """
    query = f"""
  SELECT ?book ?bookLabel ?title WHERE {{
    ?book wdt:{wp.INSTANCE_OF.pid} wd:{wp.BOOK};
      wdt:{wp.LANGUAGE_OF_WORK_OR_NAME.pid} wd:{wp.ENGLISH};
      wdt:{wp.TITLE.pid} ?title;
    FILTER(REGEX(?bookLabel, SUBSTR(STR(?book), 32 )))
    FILTER((LANG(?title)) = "en")
    SERVICE wikibase:label {{
      bd:serviceParam wikibase:language "en".
      ?book rdfs:label ?bookLabel.
    }}
  }}
  ORDER BY (?title)
  """
    print(query)
    results = SparqlQuery(repo=Site().data_repository()).select(query)
    for result in results:
        book_label = result["bookLabel"]
        title = result["title"]
        yield book_label, title
Пример #8
0
def episodes_with_titles_and_missing_labels():
    """Find English show episodes with missing labels, but with a title

        Missing labels are identified by checking if the label is equal to
        the QID

        Returns an iterable of (episode QID, title, series label)
    """
    query = f"""
    SELECT ?episode ?episodeLabel ?seriesLabel ?title WHERE {{
        ?episode wdt:{wp.INSTANCE_OF.pid} wd:{wp.TELEVISION_SERIES_EPISODE};
            wdt:{wp.ORIGNAL_LANGUAGE_OF_FILM_OR_TV_SHOW.pid} wd:{wp.ENGLISH}.
        OPTIONAL {{ ?episode wdt:{wp.TITLE.pid} ?title. }}
        OPTIONAL {{ ?episode wdt:{wp.PART_OF_THE_SERIES.pid} ?series. }}
        # Skip "http://www.wikidata.org/entity/" (31 characters)
        FILTER(REGEX(?episodeLabel, SUBSTR(STR(?episode), 32)))
        FILTER((LANG(?title)) = "en")
        SERVICE wikibase:label {{
            bd:serviceParam wikibase:language "en".
            ?episode rdfs:label ?episodeLabel.
            ?series rdfs:label ?seriesLabel.
        }}
    }}
    ORDER BY (?seriesLabel) (?title)
    """
    print(query)
    results = SparqlQuery(repo=Site().data_repository()).select(query)
    for result in results:
        episode_id = result["episode"].split("/")[-1]
        title = result["title"]
        series_label = result["seriesLabel"]
        yield episode_id, title, series_label
Пример #9
0
def movies_with_missing_labels_with_title():
    """Find English movies with missing labels, but with a title

        Missing labels are identified by checking if the label is equal to
        the QID

        Returns an iterable of (movie QID, title)
    """
    query = f"""SELECT ?movieLabel ?title ?imdbId WHERE {{
      ?movie wdt:{wp.INSTANCE_OF.pid} wd:Q11424;
        wdt:{wp.ORIGNAL_LANGUAGE_OF_FILM_OR_TV_SHOW.pid} wd:{wp.ENGLISH};
        wdt:{wp.TITLE.pid} ?title;
        wdt:{wp.IMDB_ID.pid}  ?imdbId.
      # Skip "http://www.wikidata.org/entity/" (31 characters)
      FILTER((REGEX(?movieLabel, SUBSTR(STR(?movie), 32 ))))
      FILTER((LANG(?title)) = "en")
      SERVICE wikibase:label {{
        bd:serviceParam wikibase:language "en".
        ?movie rdfs:label ?movieLabel.
      }}
    }}
    ORDER BY (?title)
    """
    print(query)
    results = SparqlQuery(repo=Site().data_repository()).select(query)
    for result in results:
        movie_label = result["movieLabel"]
        title = result["title"]
        yield movie_label, title
Пример #10
0
def movies_with_missing_titles():
    """find English movies with missing titles, but with label

        Returns an iterable of (movie QID, movie label)
    """
    query = f"""
    SELECT ?movie ?movieLabel WHERE {{
      ?movie wdt:{wp.INSTANCE_OF.pid} wd:Q11424;
        wdt:{wp.ORIGNAL_LANGUAGE_OF_FILM_OR_TV_SHOW.pid} wd:{wp.ENGLISH}.
      OPTIONAL {{ ?movie wdt:{wp.TITLE.pid} ?title. }}
      OPTIONAL {{ ?movie wdt:P345  ?imdbId. }}
      FILTER(!(BOUND(?title)))
      FILTER((BOUND(?imdbId)))
      # Skip "http://www.wikidata.org/entity/" (31 characters)
      FILTER(!(REGEX(?movieLabel, SUBSTR(STR(?movie), 32 ))))
      SERVICE wikibase:label {{
        bd:serviceParam wikibase:language "en".
        ?movie rdfs:label ?movieLabel.
      }}
    }}
    ORDER BY (?movieLabel)
    """
    print(query)
    results = SparqlQuery(repo=Site().data_repository()).select(query)
    for result in results:
        movie_id = result["movie"].split("/")[-1]
        movie_label = result["movieLabel"]
        yield movie_id, movie_label
Пример #11
0
 def __init__(self, generator, **kwargs):
     self.availableOptions.update({
         'always': True,
         'class': 'Q15618652',
         'min_labels': 1,
     })
     super(DuosManagingBot, self).__init__(**kwargs)
     self.store = QueryStore()
     self.sparql = SparqlQuery(repo=self.repo)
     self._generator = generator or self.custom_generator()
Пример #12
0
def get_existing_items_with_rfc_dois():
	sparqlquery = SparqlQuery()
	response = sparqlquery.query('SELECT ?doi ?item WHERE { ?item wdt:P356 ?doi . FILTER regex(?doi, \'^10.17487/RFC\\\\d{4}\') }')
	bindings = response['results']['bindings']
	existing_items = {}
	for binding in bindings:
		item_url = binding['item']['value']
		result = re.search(r'(Q\d+)', item_url)
		if not result:
			print('Error: could not find Wikidata item identifier in SPARQL results obtained by get_existing_items_with_rfc_dois()')
			continue
		item = result.group(1)
		doi = binding['doi']['value']
		result = re.search(r'RFC(\d+)', doi)
		if not result:
			print('Error: could not find RFC identifier in SPARQL results obtained by get_existing_items_with_rfc_dois()')
			continue
		rfc = result.group(1)
		existing_items[rfc] = item
	return existing_items
Пример #13
0
def items_with_missing_labels_with_title():
    """Find items with missing labels, but with a title

      Missing labels are identified by checking if the label is equal to
      the QID

      Returns an iterable of (item, item QID, title)
  """
    query = f"""
  SELECT DISTINCT ?item ?itemId ?title WHERE {{
    ?item wdt:{wp.INSTANCE_OF.pid} ?itemType;
      wdt:{wp.TITLE.pid} ?title.
    VALUES ?itemType {{
      wd:{wp.TELEVISION_SERIES.ljust(10, " ")} # television series
      wd:{wp.TELEVISION_SERIES_EPISODE.ljust(10, " ")} # television series episode
      wd:{wp.BOOK.ljust(10, " ")} # book
      wd:{wp.FILM.ljust(10, " ")} # film
      wd:{wp.SILENT_FILM.ljust(10, " ")} # silent film
      wd:{wp.LITERARY_WORK.ljust(10, " ")} # literary work
      wd:{wp.WRITTEN_WORK.ljust(10, " ")} # written work
      wd:{wp.PERIODICAL.ljust(10, " ")} # periodical
    }}
    # Skip "http://www.wikidata.org/entity/" (31 characters)
    BIND(SUBSTR(STR(?item), 32 ) AS ?itemId)

    # Only look for titles that are in English, since we add the English label
    FILTER((LANG(?title)) = "en")

    # The label will be the same as the QID if the label is missing
    FILTER(REGEX(?itemLabel, ?itemId))

    SERVICE wikibase:label {{
      bd:serviceParam wikibase:language "en".
      ?item rdfs:label ?itemLabel.
    }}
  }}
  """
    print(query)
    results = SparqlQuery(repo=Site().data_repository()).select(query)
    for result in results:
        item_link = result["item"]
        item_id = result["itemId"]
        title = result["title"]
        yield item_link, item_id, title
Пример #14
0
def episodes(season_id):
    """Find episodes for a given season (specified by QID)

        Returns an iterable of (season ordinal, episode QID, episode title)
    """
    query = f"""
    SELECT ?seasonOrdinal ?episode ?episodeTitle WHERE {{
      ?episode wdt:{wp.INSTANCE_OF.pid} wd:{wp.TELEVISION_SERIES_EPISODE};
               wdt:{wp.SEASON.pid} wd:{season_id};
               wdt:{wp.TITLE.pid} ?episodeTitle;
               (p:{wp.SEASON.pid}/pq:{wp.SERIES_ORDINAL.pid}) ?seasonOrdinal .
    }}
    ORDER BY (?seasonOrdinal)
    """
    results = SparqlQuery(repo=Site().data_repository()).select(query)
    for result in results:
        ordinal = int(result["seasonOrdinal"])
        episode_id = result["episode"].split("/")[-1]
        title = result["episodeTitle"]
        yield ordinal, episode_id, title
Пример #15
0
    def get_all(self):
        physician = 'Q39631'

        query = """SELECT ?subclass_of WHERE {
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
          ?subclass_of wdt:P279 wd:Q39631.
        }
        """
        sparql = SparqlQuery()
        results = sparql.query(query)

        physician_types = set([
            q['subclass_of']['value'].split("/")[-1]
            for q in results['results']['bindings']
        ])
        physician_types.add(physician)

        physicians = set()

        for t in physician_types:
            query = sub(
                'physician_type', t, """SELECT ?physician WHERE {
          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
          ?physician wdt:P106 wd:physician_type.
          ?physician wdt:P31 wd:Q5.
        }
        """)
            sparql = SparqlQuery()
            try:
                results = sparql.query(query)
                results = set([
                    q['physician']['value'].split("/")[-1]
                    for q in results['results']['bindings']
                ])
            except Exception as e:
                pprint(e)
            physicians = physicians.union(results)
        return physicians
Пример #16
0
 def do_call():
     from pywikibot.data.sparql import SparqlQuery
     sparql = SparqlQuery()
     results = sparql.query(query)
     idle_add(lambda: callback(results, *cb_args, **kwargs))
     return None
Пример #17
0
from pywikibot import pagegenerators
from pywikibot.data.sparql import SparqlQuery
import re

QUERY = """
SELECT ?id WHERE {
    ?site schema:name "%s"@en;  schema:about ?id .
}
"""
QUERY_LINK = """
SELECT ?name WHERE {
    ?page schema:about wd:%s;  schema:inLanguage "en"; schema:name ?name .
}
"""
QID = sys.argv[1]
sparql_query = SparqlQuery()
linkre = re.compile('\[\[([^|]+)?(\|.+)?]]')
titlere = re.compile('\{\{lang-(\w+)\|(.+)}}')
yearre = re.compile('\[\[(\d+) ')


def find_by_label(label):
    match = linkre.match(label)
    if match:
        label = match.group(1)
    page = pywikibot.Page(site, label)
    if page.isRedirectPage():
        redir = page.getRedirectTarget()
        label = redir.title()
    sparql = QUERY % (label)
    #    print(sparql)
Пример #18
0
import pywikibot
import sys
import json
import uuid
from pywikibot.data.sparql import SparqlQuery

HELP = """
Fix language in title from labels
"""

site = pywikibot.Site("wikidata", "wikidata")
site.throttle.setDelays(writedelay=1)
repo = site.data_repository()
sparql_query = SparqlQuery()
TITLES_QUERY = """
SELECT * WHERE {
  ?p wdt:P31 wd:Q3305213 .
  ?p (wdt:P1705|wdt:P1476) ?title .
  FILTER(lang(?title) = "und")
} LIMIT 100
"""

def fix_lang(claims, labels):
    for claim in claims:
        if claim.getSnakType() != 'value':
            continue
        target = claim.getTarget()
        print(target)
        if not isinstance(target, pywikibot.WbMonolingualText):
            continue
        if target.language != 'und':
Пример #19
0
import pywikibot
import sys
import json
import uuid
from pywikibot.data.sparql import SparqlQuery

HELP = """
This script applies painting title to label in the same language
"""

site = pywikibot.Site("wikidata", "wikidata")
site.throttle.setDelays(writedelay=1)
repo = site.data_repository()
sparql_query = SparqlQuery()
TITLES_QUERY = """
SELECT ?p ?title WHERE {
  ?p wdt:P31 wd:Q3305213 .
  ?p (wdt:P1705|wdt:P1476) ?title .
  BIND(lang(?title) as ?lt)
  FILTER(?lt != "und")
  FILTER NOT EXISTS {
    ?p rdfs:label ?pl .
    FILTER(lang(?pl) = ?lt)
  }
} LIMIT 100
"""

results = sparql_query.select(TITLES_QUERY, full_data=True)
#print(results)
for result in results:
    lang = result['title'].language
class ExternalIdSlicingBot(WikidataEntityBot):

    blacklist = {'P2013'}
    use_from_page = False

    def __init__(self, **options):
        self.available_options.update({
            'step': 10,
            'offset': 0,
        })
        super().__init__(**options)
        self.cache = {}
        self.failed = {}
        self.sparql = SparqlQuery(repo=self.repo)
        self.store = QueryStore()

    @property
    def generator(self):
        step = self.opt['step']
        opts = {
            # fixme: don't use this word
            'blacklist': ' wd:'.join(self.blacklist),
            'limit': step,
        }
        offset = self.opt['offset']
        while True:
            pywikibot.output('\nLoading items (offset %i)...' % offset)
            opts['offset'] = offset
            ask = self.store.build_query('ask_externalid_props', **opts)
            if not self.sparql.ask(ask):
                break
            query = self.store.build_query('external-ids', **opts)
            gen = PreloadingEntityGenerator(
                WikidataSPARQLPageGenerator(query, site=self.repo))
            yield from gen
            offset += step

    def treat_page_and_item(self, page, item):
        for prop, claims in item.claims.items():
            if prop in self.blacklist:
                continue
            if claims[0].type != 'external-id':
                continue
            for cl in claims:
                if not cl.target or not cl.target.startswith('http'):
                    continue
                formatter, regex = self.get_formatter_and_regex(prop)
                if not formatter:
                    pywikibot.output("%s doesn't have a formatter" % prop)
                    break
                value = self.find_value(cl.target, formatter)
                if not value:
                    pywikibot.output(
                        'Value not found in "%s" for property %s' %
                        (cl.target, prop))
                    self.failed.setdefault(prop, set()).add(item)
                    continue
                if regex:
                    try:
                        match = re.match('(%s)' % regex, value)
                    except re.error:
                        pywikibot.output('Couldn\'t apply regex "%s"' % regex)
                        break
                    if not match:
                        pywikibot.output('Value "%s" not matched by regex '
                                         '"%s"' % (value, regex))
                        self.failed.setdefault(prop, set()).add(item)
                        continue
                    value = match.group()
                summary = 'harvested the identifier based on [[Property:P1630]]'
                if regex:
                    summary += ' and [[Property:P1793]]'
                cl.changeTarget(value, summary=summary)

    def get_formatter_and_regex(self, prop):
        if prop not in self.cache:
            formatter = regex = None
            ppage = pywikibot.PropertyPage(self.repo, prop)
            if 'P1630' in ppage.claims:
                if len(ppage.claims['P1630']) > 1:
                    preferred = [
                        cl for cl in ppage.claims['P1630']
                        if cl.rank == 'preferred'
                    ]
                    if len(preferred) == 1:
                        formatter = preferred[0].target
                else:
                    formatter = ppage.claims['P1630'][0].target

            if 'P1793' in ppage.claims:
                if len(ppage.claims['P1793']) > 1:
                    preferred = [
                        cl for cl in ppage.claims['P1793']
                        if cl.rank == 'preferred'
                    ]
                    if len(preferred) == 1:
                        regex = preferred[0].target
                else:
                    regex = ppage.claims['P1793'][0].target

            self.cache[prop] = (formatter, regex)

        return self.cache[prop]

    def strip_init_stuff(self, string):
        if string.startswith(('http://', 'https://')):
            string = string.partition('//')[2]
        if string.startswith('www.'):
            string = string[4:]
        return string

    def find_value(self, url, formatter):
        url = self.strip_init_stuff(url)
        formatter = self.strip_init_stuff(formatter)
        value = pywikibot.page.url2unicode(url)
        split = formatter.split('$1')
        if not value.startswith(split[0]):
            return None
        if not split[1]:
            return value[len(split[0]):].rstrip('/')

        value = value[:-len(split[-1])]

        try:
            index = value.index(split[1], len(split[0]))
        except ValueError:
            return None
        else:
            return value[len(split[0]):index].rstrip('/')

    def exit(self):
        if self.failed:
            text = ''
            for prop in sorted(self.failed):
                text += '* [[Property:%s]]:\n' % prop
                for item in sorted(self.failed[prop]):
                    text += '** [[%s]]\n' % item.title()
            page = pywikibot.Page(
                self.repo, 'User:%s/Wrong external ids' % self.repo.username())
            page.put(text, summary='update')
        super().exit()
Пример #21
0
def sync_edition_olids_by_isbns(dry_run=False, limit=None):
    """
    Find editions on Wikidata and Open Library with the same ISBNs and add the
    Open Library ID to Wikidata and the Wikidata ID to Open Library.
    """
    wd = pywikibot.Site("wikidata", "wikidata")
    wd_repo = wd.data_repository()
    wdqs = SparqlQuery()  # Wikidata Query Service

    ol = OpenLibrary()

    # append date to query avoid getting cached results
    query = QUERY + f"\n # {datetime.datetime.now()}"
    sparql_results = wdqs.select(query)

    # Group by key (sparql hits timeouts when we do the grouping there)
    qid_to_isbns = dict()
    for row in sparql_results:
        qid = row['item'].split('/')[-1]
        if qid not in qid_to_isbns:
            qid_to_isbns[qid] = []
        qid_to_isbns[qid].append(normalize_isbn(row['isbn']))

    logger.info("Found %d editions to update", len(qid_to_isbns))
    ol_books_modified = 0
    wd_items_modified = 0
    for qid, isbns in qid_to_isbns.items():
        logger.debug("Processing %s", qid)

        for isbn_len in [10, 13]:
            count = len([isbn for isbn in isbns if len(isbn) == isbn_len])
            if count > 1:
                logger.warning("%s has multiple isbn%ss (%d)", qid, isbn_len,
                               count)

        ol_books = [ol.Edition.get(isbn=isbn) for isbn in isbns]
        ol_books = [book for book in ol_books if book and book.olid != 'None']
        ol_books = remove_dupes(ol_books, lambda ed: ed.olid)

        logger.info("Found %d Open Library book(s) for %s (isbns %s)",
                    len(ol_books), qid, ', '.join(isbns))
        if len(ol_books) > 1:
            logger.warning(
                "Multiple (%d) Open Library books for %s (isbns %s)",
                len(ol_books), qid, ', '.join(isbns))

        # update open library data
        for book in ol_books:
            if 'wikidata' not in book.identifiers:
                book.identifiers['wikidata'] = []

            book_qids = book.identifiers['wikidata']

            if qid in book_qids:
                logger.warning("%s already has qid %s", book.olid, qid)
                continue

            book_qids.append(qid)
            if len(book_qids) > 1:
                logger.warning("%s now has multiple (%d) qids (%s)", book.olid,
                               len(book_qids), ', '.join(book_qids))
            if not dry_run:
                book.save("[sync_edition_olids] add wikidata identifier")
            logger.debug("Added %s to %s", qid, book.olid)
            ol_books_modified += 1

        # update wikidata data
        for book in ol_books:
            item = pywikibot.ItemPage(wd_repo, qid)
            claim = make_str_claim(wd_repo, 'P648', book.olid)
            if not dry_run:
                item.addClaim(claim, bot=True)
            logger.debug("Added %s to %s", book.olid, qid)
            wd_items_modified += 1

        if limit:
            ol_books_limit = ol_books_modified >= limit
            wd_items_limit = wd_items_modified >= limit
            if ol_books_limit and wd_items_limit:
                logger.info(
                    "Hit limit of %s on both Open Library and Wikidata; Stopping.",
                    limit)
            elif ol_books_limit:
                logger.info("Hit limit of %s on Open Library; Stopping.",
                            limit)
            elif wd_items_limit:
                logger.info("Hit limit of %s on Wikidata; Stopping.", limit)
            if ol_books_limit or wd_items_limit:
                break
    logger.info("Updated %d Open Library books and %d Wikidata items",
                ol_books_modified, wd_items_modified)
Пример #22
0
class DuosManagingBot(WikidataEntityBot):

    conj = {
        'af': ' en ',
        'az': ' və ',
        'bg': ' и ',
        'br': ' ha ',
        'ca': ' i ',
        'cs': ' a ',
        'cy': ' a ',
        'da': ' og ',
        'de': ' und ',
        'el': ' και ',
        'en': ' and ',
        'en-gb': ' and ',
        'eo': ' kaj ',
        'es': ' y ',
        'et': ' ja ',
        'eu': ' eta ',
        'fi': ' ja ',
        'fr': ' et ',
        'fy': ' en ',
        'gl': ' e ',
        'hr': ' i ',
        'hu': ' és ',
        'id': ' dan ',
        'it': ' e ',
        'ka': ' და ',
        'la': ' et ',
        'lt': ' ir ',
        'lv': ' un ',
        'ms': ' dan ',
        'nb': ' og ',
        'nl': ' en ',
        'nn': ' og ',
        'oc': ' e ',
        'pl': ' i ',
        'pt': ' e ',
        'ro': ' și ',
        'ru': ' и ',
        'sk': ' a ',
        'sl': ' in ',
        'sr': ' и ',
        'sv': ' och ',
        'tr': ' ve ',
        'uk': ' і ',
        'vi': ' và ',
        'war': ' ngan ',
    }
    distribute_properties = {
        'P21', 'P22', 'P25', 'P27', 'P40', 'P53', 'P106', 'P1412',
    }
    class_to_relation = [
        ('Q14756018', 'twin'),
        ('Q14073567', 'sibling'),
        ('Q3046146', 'spouse'),
    ]
    relation_map = {
        #'partner': 'P451', todo
        'sibling': 'P3373',
        'spouse': 'P26',
        'twin': 'P3373',
    }
    use_from_page = False

    def __init__(self, generator, **kwargs):
        self.availableOptions.update({
            'always': True,
            'class': 'Q15618652',
            'min_labels': 1,
        })
        super(DuosManagingBot, self).__init__(**kwargs)
        self.store = QueryStore()
        self.sparql = SparqlQuery(repo=self.repo)
        self._generator = generator or self.custom_generator()

    def skip_page(self, item):
        if super(DuosManagingBot, self).skip_page(item):
            return True
        if 'P31' not in item.claims:
            pywikibot.output('%s is missing P31 property' % item)
            return True
        if 'P527' in item.claims:
            pywikibot.output('%s already has P527 property' % item)
            return True
        return False

    def custom_generator(self):
        kwargs = {'class': self.getOption('class')}
        query = self.store.build_query('duos', **kwargs)
        return pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo)

    @property
    def generator(self):
        return pagegenerators.PreloadingEntityGenerator(self._generator)

    def get_relation(self, item):
        ask_pattern = 'ASK { wd:%s wdt:P31/wdt:P279* wd:%%s }' % item.id
        for key, rel in self.class_to_relation:
            if self.sparql.ask(ask_pattern % key):
                return rel
        return None

    def get_labels(self, item, relation):
        labels = [{}, {}]
        for lang in set(item.labels.keys()) & set(self.conj.keys()):
            for conj in (self.conj[lang], ' & '):
                label = item.labels[lang].partition(' (')[0]
                if ', ' in label:
                    continue
                split = label.split(conj)
                if len(split) != 2:
                    continue
                split0 = split[0].split()
                split1 = split[1].split()
                if split1[0].islower():
                    continue
                if len(split1) > len(split0):
                    if len(split1) > 2 and split1[-2].islower():
                        split1[-2:] = [' '.join(split1[-2:])]
                    if len(split1) - len(split0) == 1:
                        # if items are in a relation, then they probably share
                        # their surname
                        if relation:
                            split[0] += ' %s' % split1[-1]
                            split0.append(split1[-1])
                if len(split0) > 1 or len(split1) == 1:
                    for i in [0, 1]:
                        labels[i][lang] = split[i]
                    break

        return labels

    def treat_page_and_item(self, page, item):
        relation = self.get_relation(item)
        labels = self.get_labels(item, relation)
        count = max(map(len, labels))
        if count == 0:
            pywikibot.output('No labels, skipping...')
            return

        if count < self.getOption('min_labels'):
            pywikibot.output('Too few labels (%i), skipping...' % count)
            return

        to_add = []
        to_remove = []
        for prop in self.distribute_properties:
            for claim in item.claims.get(prop, []):
                if claim.getTarget():
                    to_remove.append(claim)
                    json = claim.toJSON()
                    json.pop('id')
                    to_add.append(json)

        items = [self.create_item(item, data, relation, to_add)
                 for data in labels]
        if self.relation_map.get(relation):
            for it, target in zip(items, reversed(items)):
                claim = pywikibot.Claim(self.repo, self.relation_map[relation])
                claim.setTarget(target)
                self.user_add_claim(it, claim)

        for it in items:
            claim = pywikibot.Claim(self.repo, 'P527')
            claim.setTarget(it)
            self.user_add_claim(item, claim)

        for claim in to_remove:
            pywikibot.output('Removing %s --> %s' % (
                claim.id, claim.getTarget()))
            json = claim.toJSON()
            json['remove'] = ''
            self.user_edit_entity(
                item, {'claims':[json]},
                summary='moved [[Property:%s]] to %s' % (
                    claim.id, ' & '.join(map(methodcaller(
                        'title', as_link=True, insite=self.repo), items))))

    def create_item(self, item, labels, relation, to_add):
        pywikibot.output('Creating item (relation "%s")...' % relation)
        new_item = pywikibot.ItemPage(self.repo)
        data = {'labels': labels}
        self.user_edit_entity(
            new_item, data, summary='based on data in %s' % item.title(
                as_link=True, insite=self.repo), asynchronous=False)

        claim = pywikibot.Claim(self.repo, 'P31')
        claim.setTarget(pywikibot.ItemPage(self.repo, 'Q5'))
        self.user_add_claim(new_item, claim)
##        if relation == 'twin':
##            claim = pywikibot.Claim(self.repo, 'P31')
##            claim.setTarget(pywikibot.ItemPage(self.repo, 'Q159979'))
##            self.user_add_claim(new_item, claim)

        claim = pywikibot.Claim(self.repo, 'P361')
        claim.setTarget(item)
        self.user_add_claim(new_item, claim)
        for json in to_add:
            temp_claim = pywikibot.Claim.fromJSON(self.repo, json)
            pywikibot.output('Adding %s --> %s' % (
                temp_claim.id, temp_claim.getTarget()))
            self.user_edit_entity(
                new_item, {'claims':[json]},
                summary='moving [[Property:%s]] from %s' % (
                    temp_claim.id,
                    item.title(as_link=True, insite=self.repo)))
        return new_item
Пример #23
0
def _query(q):
    return SparqlQuery().select(q)
Пример #24
0
"""
file_name_ssb = '07459_20220224-190529.json'
year_adding = 2022
test_edit = '[[Wikidata:Requests for permissions/Bot/IngeniousBot 2|Test edit]]: '

query_municipality = """
SELECT DISTINCT ?item ?value WHERE {
  ?item wdt:P2504 ?value;
    wdt:P31 wd:Q755707, wd:Q755707;
    p:P1082 _:b30.
  _:b30 pq:P585 ?pointintime;
    rdf:type wikibase:BestRank.
  FILTER(?pointintime != "2022-01-01T00:00:00Z"^^xsd:dateTime)
}
"""
wikiquery = SparqlQuery()
xml = wikiquery.query(query_municipality)

# Opening JSON file from SSB
f = open(file_name_ssb)

# returns JSON object as a dictionary
data = json.load(f)

# the dict for data
data_mun = {}

# Iterating through the json
blacklist = ['21-22', '23', 'Rest']
k_num = data['dataset']['dimension']['Region']['category']['index']
for i in k_num:
Пример #25
0
def query(q):
    return SparqlQuery.select(q)
Пример #26
0
class DuosManagingBot(WikidataEntityBot):

    conj = {
        'af': ' en ',
        'az': ' və ',
        'bg': ' и ',
        'br': ' ha ',
        'ca': ' i ',
        'cs': ' a ',
        'cy': ' a ',
        'da': ' og ',
        'de': ' und ',
        'el': ' και ',
        'en': ' and ',
        'en-gb': ' and ',
        'eo': ' kaj ',
        'es': ' y ',
        'et': ' ja ',
        'eu': ' eta ',
        'fi': ' ja ',
        'fr': ' et ',
        'fy': ' en ',
        'gl': ' e ',
        'hr': ' i ',
        'hu': ' és ',
        'id': ' dan ',
        'it': ' e ',
        'ka': ' და ',
        'la': ' et ',
        'lt': ' ir ',
        'lv': ' un ',
        'ms': ' dan ',
        'nb': ' og ',
        'nl': ' en ',
        'nn': ' og ',
        'oc': ' e ',
        'pl': ' i ',
        'pt': ' e ',
        'ro': ' și ',
        'ru': ' и ',
        'sk': ' a ',
        'sl': ' in ',
        'sr': ' и ',
        'sv': ' och ',
        'tr': ' ve ',
        'uk': ' і ',
        'vi': ' và ',
        'war': ' ngan ',
    }
    distribute_properties = [
        'P21', 'P22', 'P25', 'P27', 'P40', 'P53', 'P106', 'P1412',
    ]
    class_to_relation = [
        ('Q106925878', 'father-son'),
        ('Q14756018', 'twin'),
        ('Q14073567', 'sibling'),
        ('Q3046146', 'spouse'),
        # TODO: ('Q1141470', 'comedians'), not a "relation by blood"
    ]
    relation_map = {
        'sibling': 'P3373',
        'spouse': 'P26',
        'twin': 'P3373',
        # TODO: 'partner': 'P451',
        #'father-son': '', we don't know who is who
        #'comedians': 'P1327',
    }
    use_from_page = False

    def __init__(self, generator, **kwargs):
        self.available_options.update({
            'always': True,
            'class': 'Q10648343',
            'min_labels': 1,
        })
        super().__init__(**kwargs)
        self.store = QueryStore()
        self.sparql = SparqlQuery(repo=self.repo)
        self._generator = generator or self.custom_generator()

    def skip_page(self, item):
        if super().skip_page(item):
            return True
        if 'P31' not in item.claims:
            pywikibot.output('%s is missing P31 property' % item)
            return True
        if 'P527' in item.claims:
            pywikibot.output('%s already has P527 property' % item)
            return True
        return False

    def custom_generator(self):
        kwargs = {'class': self.opt['class']}
        query = self.store.build_query('duos', **kwargs)
        return pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo)

    @property
    def generator(self):
        return pagegenerators.PreloadingEntityGenerator(self._generator)

    def get_relation(self, item):
        ask_pattern = 'ASK { wd:%s wdt:P31/wdt:P279* wd:%%s }' % item.id
        for key, rel in self.class_to_relation:
            if self.sparql.ask(ask_pattern % key):
                return rel
        return None

    def get_labels(self, item, relation):
        labels = [{}, {}]
        for lang in item.labels.keys() & self.conj.keys():
            for conj in (self.conj[lang], ' & '):
                label = item.labels[lang].partition(' (')[0]
                if ', ' in label:
                    continue
                split = label.split(conj)
                if len(split) != 2:
                    continue
                split0 = split[0].split()
                split1 = split[1].split()
                if split1[0].islower():
                    continue
                # TODO: if len(split1) > 1 and split1[0][-1] == '.':
                if len(split1) > len(split0):
                    if len(split1) > 2 and split1[-2].islower():
                        split1[-2:] = [' '.join(split1[-2:])]
                    if len(split1) - len(split0) == 1:
                        # if items are in a relation, then
                        # they probably share their surname
                        if relation:
                            split[0] += ' %s' % split1[-1]
                            split0.append(split1[-1])
                if len(split0) > 1 or len(split1) == 1:
                    labels[0][lang] = split[0]
                    labels[1][lang] = split[1]
                    break

        return labels

    def treat_page_and_item(self, page, item):
        relation = self.get_relation(item)
        labels = self.get_labels(item, relation)
        count = max(map(len, labels))
        if count == 0:
            pywikibot.output('No labels, skipping...')
            return

        if count < self.opt['min_labels']:
            pywikibot.output('Too few labels (%i), skipping...' % count)
            return

        to_add = []
        to_remove = []
        for prop in self.distribute_properties:
            for claim in item.claims.get(prop, []):
                if claim.getTarget():
                    to_remove.append(claim)
                    json = claim.toJSON()
                    json.pop('id')
                    to_add.append(json)

        items = [self.create_item(item, data, relation, to_add)
                 for data in labels]
        if self.relation_map.get(relation):
            for it, target in zip(items, reversed(items)):
                claim = pywikibot.Claim(self.repo, self.relation_map[relation])
                claim.setTarget(target)
                self.user_add_claim(it, claim)

        for it in items:
            claim = pywikibot.Claim(self.repo, 'P527')
            claim.setTarget(it)
            self.user_add_claim(item, claim)

        for claim in to_remove:
            pywikibot.output('Removing %s --> %s' % (
                claim.id, claim.getTarget()))
            json = claim.toJSON()
            json['remove'] = ''
            summary = 'moved [[Property:{}]] to {} & {}'.format(
                claim.id,
                items[0].title(as_link=True, insite=self.repo),
                items[1].title(as_link=True, insite=self.repo)
            )
            self.user_edit_entity(item, {'claims':[json]}, summary=summary)

    def create_item(self, item, labels, relation, to_add):
        pywikibot.output('Creating item (relation "%s")...' % relation)
        new_item = pywikibot.ItemPage(self.repo)
        self.user_edit_entity(
            new_item,
            {'labels': labels},
            asynchronous=False,
            summary='based on data in %s' % item.title(
                as_link=True, insite=self.repo))

        claim = pywikibot.Claim(self.repo, 'P31')
        claim.setTarget(pywikibot.ItemPage(self.repo, 'Q5'))
        self.user_add_claim(new_item, claim)
        claim = pywikibot.Claim(self.repo, 'P361')
        claim.setTarget(item)
        self.user_add_claim(new_item, claim)
        for json in to_add:
            temp_claim = pywikibot.Claim.fromJSON(self.repo, json)
            pywikibot.output('Adding %s --> %s' % (
                temp_claim.id, temp_claim.getTarget()))
            self.user_edit_entity(
                new_item, {'claims':[json]},
                summary='moving [[Property:%s]] from %s' % (
                    temp_claim.id,
                    item.title(as_link=True, insite=self.repo)))
        return new_item
Пример #27
0
#!/usr/bin/python3
import pywikibot
from pywikibot.data.sparql import SparqlQuery
import sys
"""
Check "no data" properties in case there's data now
"""
sparql_query = SparqlQuery()

START_END_QUERY = """
PREFIX q: <http://www.wikidata.org/prop/qualifier/>
SELECT DISTINCT ?s WHERE {
  BIND (p:%s as ?prop)
  ?s ?prop ?st .
# One claim with start time
  ?st q:P580 ?t .
# and no end time
  OPTIONAL { ?st q:P582 ?t2 }
  FILTER(!bound(?t2))
  ?st wikibase:rank wikibase:NormalRank.
# it's best rank, i.e. no preferred
  ?st a wikibase:BestRank .
# Another claim
  ?s ?prop ?st2 .
  FILTER(?st2 != ?st)
# with an end time
  ?st2 q:P582 ?t3 .
# and it's not a dead person
  OPTIONAL { ?s wdt:P570 ?d }
  FILTER(!bound(?d))
  ?st2 wikibase:rank wikibase:NormalRank.
Пример #28
0
QUERY = """
SELECT ?id ?idLabel WHERE {
    ?id p:P2067/psv:P2067 [ wikibase:quantityUnit wd:Q199 ] .
    ?id wdt:P31 wd:Q848944 .
    FILTER(?id != wd:Q4115189 && ?id != wd:Q13406268 && ?id != wd:Q15397819)
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
"""
TON = 'http://www.wikidata.org/entity/Q752079'
VOLUME = 'P2234'

site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
site.throttle.setDelays(writedelay=1)

sparql_query = SparqlQuery()
items = sparql_query.get_items(QUERY, item_name="id")

print("%d items found" % len(items))
for item in items:
    qid = item.strip()
    if qid[0:5] == 'http:':
        # strip http://www.wikidata.org/entity/
        qid = qid[31:]
    item = pywikibot.ItemPage(repo, qid)
    item.get()
    if PROP not in item.claims:
        print("No %s for %s, skip!" % (PROP, qid))
        continue
    badclaims = []
    for claim in item.claims[PROP]:
Пример #29
0
# Another claim
  ?s ?prop ?st2 .
  FILTER(?st2 != ?st)
# with an end time
  ?st2 q:P582 ?t3 .
} LIMIT 10
"""
LABELS = """
SELECT ?p ?pLabel {
  VALUES ?p {
%s
  }
  ?p rdfs:label ?pLabel
  FILTER(lang(?pLabel) = 'en')
}"""
sparql_query = SparqlQuery()

fromID = int(sys.argv[1])
toID = int(sys.argv[2])


def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]


for chunk in batch(range(fromID, toID), BATCH_SIZE):
    candidates = set()
    props = ' '.join(["p:P" + str(x) for x in chunk])
    sparql = QUERY % props