Exemplo n.º 1
0
    def __init__(self,
                 botid,
                 datapath,
                 key="id",
                 required_fields=[],
                 empty_check=lambda x: x == None):
        print(f"Setting up new bot '{botid}'")
        print(f"Data path: {datapath}")

        # Parse command line arguments and play it safe, assume
        # run_once and dry_run by default, except when they're
        # disabled
        args = pywikibot.handle_args()
        run_once = "-run-all" not in args
        dry_run = "-run-live" not in args
        print(f"Running once? {run_once}")
        print(f"Dry run? {dry_run}")

        self.id = botid
        self.run_once = run_once
        self.dry_run = dry_run
        self.skiplist = Skiplist(f"projects/skiplists/{self.id}.txt")
        self.key = key
        self.current_job = None
        self.data = Knead(datapath).data()
        self.required_fields = required_fields
        self.empty_check = empty_check
Exemplo n.º 2
0
    def __init__(self,
                 botid,
                 datapath=None,
                 sparql=None,
                 run_once=False,
                 qid_key="qid",
                 empty_check=lambda x: x == None or x == "",
                 precheck_data=lambda x: True):
        print(f"Setting up new bot '{botid}'")

        if (not datapath) and (not sparql):
            raise Error("No datapath and no sparql")

        # Parse command line arguments and play it safe, assume
        # run_once  by default, except when they're
        # disabled
        args = pywikibot.handle_args()
        run_once = "-run-all" not in args
        print(f"Running once? {run_once}")

        self.id = botid
        self.run_once = run_once
        self.qid_key = qid_key
        self.empty_check = empty_check
        self.precheck_data = precheck_data
        self.skiplist = Skiplist(f"projects/skiplists/{self.id}.txt")

        if datapath:
            self.data = Knead(datapath, has_header=True).data()
        elif sparql:
            query = Query(sparql)
            self.data = list(query.iter_results())
Exemplo n.º 3
0
def main():
    items = Knead(PATH + "/data/uds/monuments-with-qids.csv").data()
    skiplist = Skiplist("projects/skiplists/uds.txt")

    for index, item in enumerate(items):
        print(item)
        qid = item["qid"]
        bag = item["bag_ok"]
        url = item["url"]
        print()
        print(f"#{index} / #{len(items)}")
        print(f"Handling {qid} / {bag} / {url}")

        if skiplist.has(qid):
            print(f"{qid} in skiplist, skipping")
            continue

        wd_item = WikidataItem(qid)
        claims = wd_item.get_claims()

        if Props.BAG_BUILDING in claims:
            print("This item already has a BAG building ID, skipping")
            continue

        wd_item.add_string_claim(
            Props.BAG_BUILDING,
            bag,
            references=[
                wd_item.get_item_claim(Props.STATED_IN, Items.UDS_DOC),
                wd_item.get_url_claim(Props.REF_URL, url),
                wd_item.get_item_claim(Props.LANGUAGE_WORK, Items.DUTCH)
            ])

        skiplist.add(qid)
Exemplo n.º 4
0
def main():
    items = Knead("projects/data/churches/import.json").data()
    skiplist = Skiplist("projects/skiplists/churches.txt")

    for index, item in enumerate(items):
        qid = item["qid"]
        title = item["title"]
        print()
        print(f"#{index} / #{len(items)}")
        print(f"Handling {qid} / {title}")

        if not any([item["inception"], item["demolished"], item["restored"]]):
            print("No inception, demolished, restored, skipping")
            continue

        print(item)

        if skiplist.has(qid):
            print(f"{qid} in skiplist, skipping")
            continue

        wd_item = WikidataItem(qid)
        claims = wd_item.get_claims()
        permalink = get_permalink("nl", title)

        if item["inception"] and Props.INCEPTION not in claims:
            set_year_claim(wd_item, Props.INCEPTION, item["inception"],
                           permalink)

        if item["demolished"] and Props.DISSOLVED not in claims:
            set_year_claim(wd_item, Props.DISSOLVED, item["demolished"],
                           permalink)

        if item["restored"] and Props.SIG_EVENT not in claims:
            set_sig_claim(wd_item, Items.RECONSTRUCTION, item["restored"],
                          permalink)

        if item["expanded"] and Props.SIG_EVENT not in claims:
            set_sig_claim(wd_item, Items.BUILDING_EXPANSION, item["expanded"],
                          permalink)

        skiplist.add(qid)
Exemplo n.º 5
0
def main():
    items = Knead("projects/data/churchseats/seats-qids.csv").data()
    skiplist = Skiplist("projects/skiplists/churchseats.txt")
    permalink = "https://nl.wikipedia.org/w/index.php?title=Lijst_van_grootste_Nederlandse_kerkgebouwen_naar_zitplaatsen&oldid=56777124"

    for index, item in enumerate(items):
        qid = item["qid"]
        title = item["name"]
        seats = item["seats"]
        print()
        print(f"#{index} / #{len(items)}")
        print(f"Handling {qid} / {title} / {seats} seats")

        print(item)

        if skiplist.has(qid):
            print(f"{qid} in skiplist, skipping")
            continue

        wd_item = WikidataItem(qid)
        claims = wd_item.get_claims()

        if Props.CAPACITY in claims:
            print("This item already has capacity, skipping")
            continue

        wd_item.add_quantity_claim(
            Props.CAPACITY,
            seats,
            references=[
                wd_item.get_item_claim(Props.IMPORTED_FROM,
                                       Items.WIKIPEDIA_NL),
                wd_item.get_url_claim(Props.WM_IMPORT_URL, permalink)
            ])

        skiplist.add(qid)
Exemplo n.º 6
0
def match_seasons():
    PATH = str(Path(__file__).parent)
    seasons = Knead(PATH + "/data/zomergasten/seasons.csv").data()
    episodes = Knead(PATH + "/data/zomergasten/episodes.csv").data()
    skiplist = Skiplist(PATH + "/skiplists/zomergasten-seasons.txt")

    def get_season_by_year(year):
        for season in seasons:
            if season["year"] == year:
                return season

        return None

    prev_ep = None
    next_ep = None
    cur_year = "1988"
    ep_index = 1

    for index, episode in enumerate(episodes):
        ep_qid = episode["item"]
        ep_year = episode["year"]
        ep_title = episode["itemLabel"]
        season = get_season_by_year(ep_year)
        season_qid = season["item"]
        season_title = season["itemLabel"]

        if skiplist.has(ep_qid):
            print(f"{ep_qid} ({ep_title}) in skiplist, skipping")
            if season["year"] != cur_year:
                print("reset")
                ep_index = 1
                cur_year = season["year"]

            prev_ep = episode
            ep_index += 1

            continue

        if season["year"] != cur_year:
            ep_index = 1
            cur_year = season["year"]

        try:
            next_ep = episodes[index + 1]
        except:
            next_ep = None

        print("---" * 20)
        print(
            f"{ep_qid} - {ep_title} / #{ep_index} {season_qid} {season_title}")
        print(f"{prev_ep} / {next_ep}")
        print("---" * 20)
        print()

        item = WikidataItem(ep_qid)

        item.add_item_claim(Props.SEASON,
                            season_qid,
                            qualifiers=[
                                item.get_string_claim(Props.SERIES_ORDINAL,
                                                      str(ep_index))
                            ])

        if prev_ep:
            item.add_item_claim(Props.FOLLOWS, prev_ep["item"])

        if next_ep:
            item.add_item_claim(Props.FOLLOWED_BY, next_ep["item"])

        skiplist.add(ep_qid)

        prev_ep = episode
        ep_index += 1
Exemplo n.º 7
0
class CreateBot:
    def __init__(self,
                 botid,
                 datapath,
                 key="id",
                 required_fields=[],
                 empty_check=lambda x: x == None):
        print(f"Setting up new bot '{botid}'")
        print(f"Data path: {datapath}")

        # Parse command line arguments and play it safe, assume
        # run_once and dry_run by default, except when they're
        # disabled
        args = pywikibot.handle_args()
        run_once = "-run-all" not in args
        dry_run = "-run-live" not in args
        print(f"Running once? {run_once}")
        print(f"Dry run? {dry_run}")

        self.id = botid
        self.run_once = run_once
        self.dry_run = dry_run
        self.skiplist = Skiplist(f"projects/skiplists/{self.id}.txt")
        self.key = key
        self.current_job = None
        self.data = Knead(datapath).data()
        self.required_fields = required_fields
        self.empty_check = empty_check

    def has_required_fields(self, item):
        for field in self.required_fields:
            if self.empty_check(item[field]):
                print(f"'{field}' is empty, aborting")
                return False

        return True

    def iterate(self):
        for index, item in enumerate(self.data):
            if self.key not in item or item[self.key] == "":
                print(f"This item has no key, skipping, {item}")
                continue

            item_id = item[self.key]

            print()
            print(f"#{index + 1} / {len(self.data)} / id:{item_id}")

            if self.skiplist.has(item_id):
                print(f"{item_id} in skiplist, skipping")
                continue

            dd(item)
            print()

            if not self.has_required_fields(item):
                continue

            if self.dry_run:
                print("Dry run, skip the actual creating")
                continue

            job = BotJob(data=item)
            self.current_job = job
            yield job

            if job.is_aborted:
                continue

            if not job.item:
                raise Exception("Still no item for this job, aborting")

            self.skiplist.add(item_id)

            if self.run_once:
                print("Only running once...")
                sys.exit()

        print("Bot is done")
        send_im_message(f"CreateBot finished running: {self.id}")
Exemplo n.º 8
0
class Bot:
    def __init__(self,
                 botid,
                 datapath=None,
                 sparql=None,
                 run_once=False,
                 qid_key="qid",
                 empty_check=lambda x: x == None or x == "",
                 precheck_data=lambda x: True):
        print(f"Setting up new bot '{botid}'")

        if (not datapath) and (not sparql):
            raise Error("No datapath and no sparql")

        # Parse command line arguments and play it safe, assume
        # run_once  by default, except when they're
        # disabled
        args = pywikibot.handle_args()
        run_once = "-run-all" not in args
        print(f"Running once? {run_once}")

        self.id = botid
        self.run_once = run_once
        self.qid_key = qid_key
        self.empty_check = empty_check
        self.precheck_data = precheck_data
        self.skiplist = Skiplist(f"projects/skiplists/{self.id}.txt")

        if datapath:
            self.data = Knead(datapath, has_header=True).data()
        elif sparql:
            query = Query(sparql)
            self.data = list(query.iter_results())

    def iterate(self):
        for index, item in enumerate(self.data):
            if self.qid_key not in item or self.empty_check(
                    item[self.qid_key]):
                print(f"This item has no QID, skipping, {item}")
                continue

            qid = item[self.qid_key]
            print()
            print(f"#{index + 1}/{len(self.data)} / {qid}")
            print(f"Data: {item}")
            print()

            if self.skiplist.has(qid):
                print(f"{qid} in skiplist, skipping")
                continue

            # This is just a hook for doing a sanity check before fetching the data
            if not self.precheck_data(item):
                print(f"This item did not pass precheck, skipping")
                continue

            try:
                wd_item = WikidataItem(qid)
            except Exception as e:
                print(f"Exception, not yielding this job: {e}")
                continue

            job = BotJob(data=item, item=wd_item)
            yield job

            self.skiplist.add(qid)

            if self.run_once:
                print("Only running once...")
                sys.exit()

        send_im_message(f"Bot finished running: {self.id}")
Exemplo n.º 9
0
from dataknead import Knead
from pathlib import Path
from pywikibot import WbTime
from util.skiplist import Skiplist
from util.wikidata import WikidataItem, Props, Items
from util.dates import wbtime_now
import pywikibot
import sys

PATH = str(Path(__file__).parent.resolve())
skiplist = Skiplist(PATH + "/skiplists/kos-nijmegen.txt")
WP_PERMALINK = "https://nl.wikipedia.org/w/index.php?title=Lijst_van_beelden_in_Nijmegen&oldid=58187301"

def get_refs(item, url):
    return [
        item.get_item_claim(Props.STATED_IN, Items.PUBLIC_ART_IN_NIJMEGEN),
        item.get_url_claim(Props.REF_URL, url),
        item.get_claim(Props.RETRIEVED, WbTime(
            year = 2021, month = 1, day = 31
        )),
        item.get_item_claim(Props.LANGUAGE_WORK, Items.DUTCH)
    ]

def add_image_alias(item):
    qid = item["item"]
    img = item["Afbeelding"]
    alias = item["alias"]

    item = WikidataItem(qid)
    claims = item.get_claims()
    aliases = item.get_aliases("nl")
Exemplo n.º 10
0
from dataknead import Knead
from pathlib import Path
from pywikibot import WbTime
from util.skiplist import Skiplist
from util.wikidata import WikidataItem, Props, Items
from util.dates import wbtime_now
import pywikibot
import sys

PATH = str(Path(__file__).parent.resolve())
skiplist = Skiplist(PATH + "/skiplists/pp.txt")
WP_PERMALINK = "https://nl.wikipedia.org/w/index.php?title=Historische_zetelverdeling_Tweede_Kamer&oldid=58661500"


def add_party_data(row):
    print("----" * 20)
    print()
    print(row)

    title = row["title"]
    qid = row["qid"]

    if skiplist.has(qid):
        print(f"In skiplist, skipping")
        return

    item = WikidataItem(qid)

    if Props.NR_OF_SEATS in item.get_claims():
        print("Got seats already, skipping party")
        return