def test_reference_get_all_sources(self): r_test = WD.Reference(source_test=self.ref_1) self.assertEqual(r_test.get_all_sources(), [self.ref_1]) r_notest = WD.Reference(source_notest=self.ref_1) self.assertEqual(r_notest.get_all_sources(), [self.ref_1]) r_both = WD.Reference(self.ref_1, self.ref_2) self.assertEqual(r_both.get_all_sources(), [self.ref_1, self.ref_2])
def test_reference_init_single_claim_gives_list(self): r_test = WD.Reference(source_test=self.ref_1) self.assertEqual(r_test.source_test, [self.ref_1]) self.assertEqual(r_test.source_notest, []) r_notest = WD.Reference(source_notest=self.ref_1) self.assertEqual(r_notest.source_test, []) self.assertEqual(r_notest.source_notest, [self.ref_1]) r_both = WD.Reference(self.ref_1, self.ref_2) self.assertEqual(r_both.source_test, [self.ref_1]) self.assertEqual(r_both.source_notest, [self.ref_2])
def test_reference_init_non_claim_error(self): with self.assertRaises(pwbError) as cm: WD.Reference(source_test='foo') self.assertEqual( str(cm.exception), 'You tried to create a reference with a non-Claim source') with self.assertRaises(pwbError) as cm: WD.Reference(source_notest='foo') self.assertEqual( str(cm.exception), 'You tried to create a reference with a non-Claim source')
def test_reference_init_with_list(self): r_test = WD.Reference(source_test=[self.ref_1, self.ref_2]) self.assertEqual(r_test.source_test, [self.ref_1, self.ref_2]) self.assertEqual(r_test.source_notest, []) r_notest = WD.Reference(source_notest=[self.ref_1, self.ref_2]) self.assertEqual(r_notest.source_test, []) self.assertEqual(r_notest.source_notest, [self.ref_1, self.ref_2]) r_both = WD.Reference([self.ref_1, self.ref_2], [self.ref_2, self.ref_1]) self.assertEqual(r_both.source_test, [self.ref_1, self.ref_2]) self.assertEqual(r_both.source_notest, [self.ref_2, self.ref_1])
def make_ref(self, date): """Make a correctly formatted ref object for claims. Contains 4 parts: * P248: Stated in <the kulturnav dataset> * P577: Publication date <from the document> * P854: Reference url <using the current uuid> * P813: Retrieval date <current date> P854 Should be in source_test (after retroactively fixing older references) but by being in source_notest we ensure that duplicate uuids don't source the statement twice. @param date: The "last modified" time of the document @type date: pywikibot.WbTime @return: the formated reference @rtype WD.Reference """ reference_url = 'http://kulturnav.org/%s' % self.current_uuid ref = WD.Reference( source_test=self.wd.make_simple_claim( 'P248', self.wd.QtoItemPage(self.DATASET_Q)), source_notest=[ self.wd.make_simple_claim('P577', date), self.wd.make_simple_claim('P854', reference_url), self.wd.make_simple_claim('P813', helpers.today_as_WbTime()) ]) return ref
def make_url_ref(self, url, fetch_date, publish_date=None): """Make a Reference object for a url. Contains 3 parts: * P813: Retrieval date * P577: Publication date <from creation date of the document> * P854: Reference url <using the input url> :param url: the source url :param fetch_date: the retrieval date url (iso) :param publish_date: the retrieval date url (iso) :return: WdS.Reference """ date_claims = [] if publish_date: date_claims.append( self.wd.make_simple_claim('P577', helpers.iso_to_WbTime(publish_date))) date_claims.append( self.wd.make_simple_claim('P813', helpers.iso_to_WbTime(fetch_date))) ref = WdS.Reference( source_test=[self.wd.make_simple_claim('P854', url)], source_notest=date_claims) return ref
def test_reference_repr(self): """Also ensures there is a repr for Claim.""" r = WD.Reference(self.ref_1, self.ref_2) self.assertEqual( repr(r), 'WD.Reference(' 'test: [WD.Claim(P55: foo)], ' 'no_test: [WD.Claim(P55: bar)])')
def make_lido_ref(self, lido_data): """ Make a Reference object for the dataset. Contains 4 parts: * P248: Stated in <the Nationalmuseum dataset> * P577: Publication date <from creation date of the document> * P854: Reference url <using the input url> * P813: Retrieval date <current date> """ exit() # P248: Nationalmuseum dataset xml_file = lido_data.get('source_file') date = helpers.today_as_WbTime() pub_date = helpers.iso_to_WbTime(u'2016-09-30') zip_url = u'https://github.com/NationalmuseumSWE/WikidataCollection/' \ u'blob/master/valid_items_transform_1677.tgz' ref = WD.Reference(source_test=[ self.wd.make_simple_claim(u'P854', zip_url), self.wd.make_simple_claim(u'P577', pub_date), self.wd.make_simple_claim(u'P?', xml_file), ], source_notest=self.wd.make_simple_claim( u'P813', date)) return ref
def setUp(self): wikidata = Site('test', 'wikidata') self.q_1 = WD.Qualifier('P123', 'foo') self.q_2 = WD.Qualifier('P123', 'bar') claim = Claim(wikidata, 'P55') claim.setTarget('foo') self.ref = WD.Reference(source_test=[ claim, ])
def make_url_reference(self, uri): """Make a Reference object with a retrieval url and today's date. @param uri: retrieval uri/url @type uri: str @rtype: WD.Reference """ date = helpers.today_as_WbTime() ref = WD.Reference(source_test=self.wd.make_simple_claim(u'P854', uri), source_notest=self.wd.make_simple_claim( u'P813', date)) return ref
class WikidataItem(object): def __init__(self, db_row_dict, repository, data_files, existing, caches): self.repo = repository self.existing = existing self.wdstuff = WDS(self.repo) self.raw_data = db_row_dict self.caches = caches self.problem_report = {} self.props = data_files["properties"] self.construct_wd_item() self.problem_report = {} def get_caches(self): return self.caches def make_q_item(self, qnumber): return self.wdstuff.QtoItemPage(qnumber) def make_pywikibot_item(self, value): val_item = None if isinstance(value, list) and len(value) == 1: value = value[0] if utils.string_is_q_item(value): val_item = self.make_q_item(value) elif value == "novalue": val_item = value elif isinstance(value, dict) and 'monolingual_value' in value: text = value['monolingual_value'] language = value['lang'] val_item = pywikibot.WbMonolingualText(text=text, language=language) elif isinstance(value, dict) and 'quantity_value' in value: number = value['quantity_value'] if 'unit' in value: unit = self.wdstuff.QtoItemPage(value["unit"]) else: unit = None val_item = pywikibot.WbQuantity(amount=number, unit=unit, site=self.repo) elif isinstance(value, dict) and 'date_value' in value: date_dict = value["date_value"] val_item = pywikibot.WbTime(year=date_dict.get("year"), month=date_dict.get("month"), day=date_dict.get("day")) elif value == "novalue": # raise NotImplementedError # implement Error print("Status: novalue will be added here") else: val_item = value return val_item def make_statement(self, value): if value in ['somevalue', 'novalue']: special = True else: special = False return self.wdstuff.Statement(value, special=special) def make_qualifier_applies_to(self, value): prop_item = self.props["applies_to_part"] target_item = self.wdstuff.QtoItemPage(value) return self.wdstuff.Qualifier(prop_item, target_item) def add_statement(self, prop_name, value, quals=None, ref=None): base = self.wd_item["statements"] prop = self.props[prop_name] if quals is None: quals = [] wd_claim = self.make_pywikibot_item(value) statement = self.make_statement(wd_claim) for qual in helpers.listify(quals): statement.addQualifier(qual) base.append({"prop": prop, "value": statement, "ref": ref}) def make_stated_in_ref(self, value, pub_date=None, ref_url=None, retrieved_date=None): item_prop = self.props["stated_in"] published_prop = self.props["publication_date"] published_claim = None if pub_date: pub_date = utils.date_to_dict(pub_date, "%Y-%m-%d") timestamp = self.make_pywikibot_item({"date_value": pub_date}) published_claim = self.wdstuff.make_simple_claim( published_prop, timestamp) source_item = self.wdstuff.QtoItemPage(value) source_claim = self.wdstuff.make_simple_claim(item_prop, source_item) if ref_url and retrieved_date: ref_url_prop = self.props["reference_url"] retrieved_date_prop = self.props["retrieved"] retrieved_date = utils.date_to_dict(retrieved_date, "%Y-%m-%d") retrieved_date = self.make_pywikibot_item( {"date_value": retrieved_date}) ref_url_claim = self.wdstuff.make_simple_claim( ref_url_prop, ref_url) retrieved_on_claim = self.wdstuff.make_simple_claim( retrieved_date_prop, retrieved_date) if published_claim: ref = self.wdstuff.Reference( source_test=[source_claim, ref_url_claim], source_notest=[published_claim, retrieved_on_claim]) else: ref = self.wdstuff.Reference( source_test=[source_claim, ref_url_claim], source_notest=[retrieved_on_claim]) else: ref = self.wdstuff.Reference(source_test=[source_claim], source_notest=published_claim) return ref def associate_wd_item(self, wd_item): if wd_item is not None: self.wd_item["wd-item"] = wd_item def set_upload(self, booln): self.wd_item["upload"] = booln def add_label(self, language, text): base = self.wd_item["labels"] base.append({"language": language, "value": text}) def add_description(self, language, text): base = self.wd_item["descriptions"] base.append({"language": language, "value": text}) def add_to_report(self, key_name, raw_data, id_no, prop_name=None): """ Add data to problem report json. Check if item has an associated Q-number, and if that's the case and it's missing in the report, add it to the report automatically. Optionally, assign a Property ID that the data should have been used as a value for. :param key_name: name of the field containing the problematic data, e.g. the header of the column :type key_name: string :param raw_data: the data that we failed to process :type raw_data: string :param id_no: unique id assigned to item, e.g. url :type id_no: string :param prop_name: name of the property, as stated in the props library file :type prop_name: string """ prop = None if prop_name: if prop_name.startswith('_'): prop = prop_name else: prop = self.props.get(prop_name) self.problem_report[key_name] = {"value": raw_data, "target": prop} if "wd-item" not in self.problem_report: if self.wd_item["wd-item"] is not None: self.problem_report["Q"] = self.wd_item["wd-item"] else: self.problem_report["Q"] = "" self.problem_report["url"] = id_no def print_report(self): """Print the problem report on screen.""" print( json.dumps(self.problem_report, sort_keys=True, indent=4, ensure_ascii=False, default=utils.datetime_convert)) def get_report(self): """Retrieve the problem report.""" return self.problem_report def construct_wd_item(self): self.wd_item = {} self.wd_item["upload"] = True self.wd_item["statements"] = [] self.wd_item["labels"] = [] self.wd_item["descriptions"] = [] self.wd_item["wd-item"] = None
def test_reference_init_empty_error(self): with self.assertRaises(pwbError) as cm: WD.Reference() self.assertEqual( str(cm.exception), 'You tried to create a reference without any sources')
class WikidataItem(object): def __init__(self, db_row_dict, repository, data_files, existing): self.repo = repository self.existing = existing self.wdstuff = WDS(self.repo) self.raw_data = db_row_dict self.props = data_files["properties"] self.construct_wd_item() self.problem_report = {} def make_q_item(self, qnumber): return self.wdstuff.QtoItemPage(qnumber) def make_pywikibot_item(self, value): val_item = None if isinstance(value, list) and len(value) == 1: value = value[0] if utils.string_is_q_item(value): val_item = self.make_q_item(value) elif value == "novalue": val_item = value elif isinstance(value, dict) and 'quantity_value' in value: number = value['quantity_value'] if 'unit' in value: unit = self.wdstuff.QtoItemPage(value["unit"]) else: unit = None val_item = pywikibot.WbQuantity(amount=number, unit=unit, site=self.repo) elif isinstance(value, dict) and 'date_value' in value: date_dict = value["date_value"] val_item = pywikibot.WbTime(year=date_dict.get("year"), month=date_dict.get("month"), day=date_dict.get("day")) elif value == "novalue": # raise NotImplementedError # implement Error print("Status: novalue will be added here") else: val_item = value return val_item def make_statement(self, value): if value in ['somevalue', 'novalue']: special = True else: special = False return self.wdstuff.Statement(value, special=special) def make_qualifier_applies_to(self, value): prop_item = self.props["applies_to_part"] target_item = self.wdstuff.QtoItemPage(value) return self.wdstuff.Qualifier(prop_item, target_item) def add_statement(self, prop_name, value, quals=None, ref=None): base = self.wd_item["statements"] prop = self.props[prop_name] if quals is None: quals = [] wd_claim = self.make_pywikibot_item(value) statement = self.make_statement(wd_claim) for qual in helpers.listify(quals): statement.addQualifier(qual) base.append({"prop": prop, "value": statement, "ref": ref}) def make_stated_in_ref(self, value, pub_date, ref_url=None, retrieved_date=None): item_prop = self.props["stated_in"] published_prop = self.props["publication_date"] pub_date = utils.date_to_dict(pub_date, "%Y-%m-%d") timestamp = self.make_pywikibot_item({"date_value": pub_date}) published_claim = self.wdstuff.make_simple_claim( published_prop, timestamp) source_item = self.wdstuff.QtoItemPage(value) source_claim = self.wdstuff.make_simple_claim(item_prop, source_item) if ref_url and retrieved_date: ref_url_prop = self.props["reference_url"] retrieved_date_prop = self.props["retrieved"] retrieved_date = utils.date_to_dict(retrieved_date, "%Y-%m-%d") retrieved_date = self.make_pywikibot_item( {"date_value": retrieved_date}) ref_url_claim = self.wdstuff.make_simple_claim( ref_url_prop, ref_url) retrieved_on_claim = self.wdstuff.make_simple_claim( retrieved_date_prop, retrieved_date) ref = self.wdstuff.Reference( source_test=[source_claim, ref_url_claim], source_notest=[published_claim, retrieved_on_claim]) else: ref = self.wdstuff.Reference(source_test=[source_claim], source_notest=published_claim) return ref def associate_wd_item(self, wd_item): if wd_item is not None: self.wd_item["wd-item"] = wd_item def add_label(self, language, text): base = self.wd_item["labels"] base.append({"language": language, "value": text}) def add_description(self, language, text): base = self.wd_item["descriptions"] base.append({"language": language, "value": text}) def construct_wd_item(self): self.wd_item = {} self.wd_item["upload"] = True self.wd_item["statements"] = [] self.wd_item["labels"] = [] self.wd_item["descriptions"] = [] self.wd_item["wd-item"] = None
class WikidataItem(object): """Basic data object for upload to Wikidata.""" def __init__(self, db_row_dict, repository, data_files, existing): """ Initialize the data object. :param db_row_dict: raw data from the data source :type db_row_dict: string :param repository: data repository (Wikidata site) :type repository: site instance :param data_files: dict of various mapping files :type data_files: dictionary :param existing: WD items that already have an unique id :type existing: dictionary """ self.repo = repository self.existing = existing self.wdstuff = WDS(self.repo) self.raw_data = db_row_dict self.props = data_files["properties"] self.items = data_files["items"] self.construct_wd_item() self.problem_report = {} def make_q_item(self, qnumber): """ Create a regular Wikidata ItemPage. :param qnumber: Q-item that we want to get an ItemPage of :type qnumber: string :return: an ItemPage for pywikibot """ return self.wdstuff.QtoItemPage(qnumber) def make_pywikibot_item(self, value): """ Create a statement in pywikibot-ready format. The statement can be either: * a string (value is string) * an item (value is Q-string) * an amount with or without unit (value is dic) :param value: the content of the item :type value: it can be a string or a dictionary, see above. :return: a pywikibot item of the type determined by the input data, either ItemPage or Quantity or string. """ val_item = None if isinstance(value, list) and len(value) == 1: value = value[0] if utils.string_is_q_item(value): val_item = self.make_q_item(value) elif value == "novalue": val_item = value elif isinstance(value, dict) and 'quantity_value' in value: number = value['quantity_value'] if 'unit' in value: unit = self.wdstuff.QtoItemPage(value["unit"]) else: unit = None val_item = pywikibot.WbQuantity( amount=number, unit=unit, site=self.repo) elif isinstance(value, dict) and 'date_value' in value: date_dict = value["date_value"] val_item = pywikibot.WbTime(year=date_dict["year"], month=date_dict["month"], day=date_dict["day"]) elif value == "novalue": # raise NotImplementedError # implement Error print("Status: novalue will be added here") else: val_item = value return val_item def make_statement(self, value): """ Create a Wikidatastuff statement. Supports the special data types 'somevalue' and 'novalue'. :prop value: the content of the statement :type value: pywikibot item :return: a wikidatastuff statement """ if value in ['somevalue', 'novalue']: special = True else: special = False return self.wdstuff.Statement(value, special=special) def make_qualifier_applies_to(self, value): """ Create a qualifier to a statement with type 'applies to part'. :param value: Q-item that this applies to :type value: string :return: a wikidatastuff Qualifier """ prop_item = self.props["applies_to_part"] target_item = self.wdstuff.QtoItemPage(value) return self.wdstuff.Qualifier(prop_item, target_item) def add_statement(self, prop_name, value, quals=None, ref=None): """ Add a statement to the data object. :param prop_name: P-item representing property :type prop_name: string :param value: content of the statement :type value: it can be a string representing a Q-item or a dictionary of an amount :param quals: possibly qualifier items :type quals: a wikidatastuff Qualifier item, or a list of them :param ref: reference item :type ref: a wikidatastuff Reference item """ base = self.wd_item["statements"] prop = self.props[prop_name] if quals is None: quals = [] wd_claim = self.make_pywikibot_item(value) statement = self.make_statement(wd_claim) for qual in helpers.listify(quals): statement.addQualifier(qual) base.append({"prop": prop, "value": statement, "ref": ref}) def make_stated_in_ref(self, value, pub_date, ref_url=None, retrieved_date=None): """ Make a reference object of type 'stated in'. :param value: Q-item where sth is stated :type value: string :param pub_date: timestamp in format "1999-09-31" :type pub_date: string :param ref_url: optionally a reference url :type ref_url: string :param retrieved_date: timestamp in format "1999-09-31" :type retrieved_date: string :return: a wikidatastuff Reference item """ item_prop = self.props["stated_in"] published_prop = self.props["publication_date"] pub_date = utils.date_to_dict(pub_date, "%Y-%m-%d") timestamp = self.make_pywikibot_item({"date_value": pub_date}) published_claim = self.wdstuff.make_simple_claim( published_prop, timestamp) source_item = self.wdstuff.QtoItemPage(value) source_claim = self.wdstuff.make_simple_claim(item_prop, source_item) if ref_url and retrieved_date: ref_url_prop = self.props["reference_url"] retrieved_date_prop = self.props["retrieved"] retrieved_date = utils.date_to_dict(retrieved_date, "%Y-%m-%d") retrieved_date = self.make_pywikibot_item( {"date_value": retrieved_date}) ref_url_claim = self.wdstuff.make_simple_claim( ref_url_prop, ref_url) retrieved_on_claim = self.wdstuff.make_simple_claim( retrieved_date_prop, retrieved_date) ref = self.wdstuff.Reference( source_test=[source_claim, ref_url_claim], source_notest=[published_claim, retrieved_on_claim]) else: ref = self.wdstuff.Reference( source_test=[source_claim], source_notest=published_claim ) return ref def associate_wd_item(self, wd_item): """ Associate the data object with a Wikidata item. :param wd_item: Q-item that shall be assigned to the data object. :type wd_item: string """ if wd_item is not None: self.wd_item["wd-item"] = wd_item print("Associated WD item: ", wd_item) def add_label(self, language, text): """ Add a label in a specific language. :param language: code of language, e.g. "fi" :type language: string :param text: content of the label :type text: string """ base = self.wd_item["labels"] base.append({"language": language, "value": text}) def add_description(self, language, text): """ Add a description in a specific language. :param language: code of language, e.g. "fi" :type language: string :param text: content of the description :type text: string """ base = self.wd_item["descriptions"] base.append({"language": language, "value": text}) def construct_wd_item(self): """ Create the empty structure of the data object. This creates self.wd_item -- a dict container of all the data content of the item. """ self.wd_item = {} self.wd_item["upload"] = True self.wd_item["statements"] = [] self.wd_item["labels"] = [] self.wd_item["descriptions"] = [] self.wd_item["wd-item"] = None
def make_commons_reference(self): """Make a Reference object saying imported from Wikimedia Commons.""" commons_item = self.wd.QtoItemPage(COMMONS_Q) ref = WD.Reference(source_test=self.wd.make_simple_claim( u'P143', commons_item)) # imported from return ref