def __init__(self, s: str, property_type, creator='http://www.isi.edu/datamart'): Entity.__init__(self, s, creator) Property.__init__(self, s) self.add_property(URI('rdf:type'), URI('wikibase:Property')) type_uri = property_type if not isinstance( property_type, Datatype) else Datatype(property_type) self.add_property(URI('wikibase:propertyType'), type_uri.type) self.add_property(URI('wikibase:directClaim'), URI('wdt:' + s)) self.add_property(URI('wikibase:directClaimNormalized'), URI('wdtn:' + s)) self.add_property(URI('wikibase:claim'), URI('p:' + s)) self.add_property(URI('wikibase:statementProperty'), URI('ps:' + s)) self.add_property(URI('wikibase:statementValue'), URI('psv:' + s)) self.add_property(URI('wikibase:statementValueNormalized'), URI('psn:' + s)) self.add_property(URI('wikibase:qualifier'), URI('pq:' + s)) self.add_property(URI('wikibase:qualifierValue'), URI('pqv:' + s)) self.add_property(URI('wikibase:qualifierValueNormalized'), URI('pqn:' + s)) self.add_property(URI('wikibase:reference'), URI('pr:' + s)) self.add_property(URI('wikibase:referenceValue'), URI('prv:' + s)) self.add_property(URI('wikibase:referenceValueNormalized'), URI('prn:' + s)) self.add_property(URI('wikibase:novalue'), URI('wdno:' + s))
def model_data() -> None: """ This function generates triples for user defined properties for uploading them to wikidata :return: """ stream = open(Path.cwd().parent / "Datasets/new-property-configuration.yaml", 'r', encoding='utf8') yaml_data = yaml.safe_load(stream) # initialize kg_schema = KGSchema() kg_schema.add_schema('@prefix : <http://isi.edu/> .', 'ttl') etk = ETK(kg_schema=kg_schema, modules=ETKModule) doc = etk.create_document({}, doc_id="http://isi.edu/default-ns/projects") # bind prefixes doc.kg.bind('wikibase', 'http://wikiba.se/ontology#') doc.kg.bind('wd', 'http://www.wikidata.org/entity/') doc.kg.bind('wdt', 'http://www.wikidata.org/prop/direct/') doc.kg.bind('wdtn', 'http://www.wikidata.org/prop/direct-normalized/') doc.kg.bind('wdno', 'http://www.wikidata.org/prop/novalue/') doc.kg.bind('wds', 'http://www.wikidata.org/entity/statement/') doc.kg.bind('wdv', 'http://www.wikidata.org/value/') doc.kg.bind('wdref', 'http://www.wikidata.org/reference/') doc.kg.bind('p', 'http://www.wikidata.org/prop/') doc.kg.bind('pr', 'http://www.wikidata.org/prop/reference/') doc.kg.bind('prv', 'http://www.wikidata.org/prop/reference/value/') doc.kg.bind('prn', 'http://www.wikidata.org/prop/reference/value-normalized/') doc.kg.bind('ps', 'http://www.wikidata.org/prop/statement/') doc.kg.bind('psv', 'http://www.wikidata.org/prop/statement/value/') doc.kg.bind('psn', 'http://www.wikidata.org/prop/statement/value-normalized/') doc.kg.bind('pq', 'http://www.wikidata.org/prop/qualifier/') doc.kg.bind('pqv', 'http://www.wikidata.org/prop/qualifier/value/') doc.kg.bind('pqn', 'http://www.wikidata.org/prop/qualifier/value-normalized/') doc.kg.bind('skos', 'http://www.w3.org/2004/02/skos/core#') doc.kg.bind('prov', 'http://www.w3.org/ns/prov#') doc.kg.bind('schema', 'http://schema.org/') sparql_endpoint = "https://query.wikidata.org/sparql" type_map = {'quantity': Datatype.QuantityValue, 'url': URLValue} property_type_cache = {} for k, v in yaml_data.items(): p = WDProperty(k, type_map[v['type']], creator='http://www.isi.edu/t2wml') for lang, value in v['label'].items(): for val in value: p.add_label(val, lang=lang) for lang, value in v['description'].items(): for val in value: p.add_description(val, lang=lang) for pnode, items in v['statements'].items(): for item in items: try: property_type = property_type_cache[pnode] except KeyError: property_type = get_property_type(pnode, sparql_endpoint) property_type_cache[pnode] = property_type if property_type == "WikibaseItem": value = Item(str(item['value'])) elif property_type == "WikibaseProperty": value = Property(item['value']) elif property_type == "String": value = StringValue(item['value']) elif property_type == "Quantity": value = QuantityValue(item['value']) elif property_type == "Time": value = TimeValue( str(item['value']), Item(item["calendar"]), translate_precision_to_integer(item["precision"]), item["time_zone"]) elif property_type == "Url": value = URLValue(item['value']) elif property_type == "Monolingualtext": value = MonolingualText(item['value'], item["lang"]) elif property_type == "ExternalId": value = ExternalIdentifier(item['value']) elif property_type == "GlobeCoordinate": value = GlobeCoordinate(item["latitude"], item["longitude"], item["precision"]) p.add_statement(pnode, value) doc.kg.add_subject(p) with open(Path.cwd().parent / "new_properties/result.ttl", "w") as f: data = doc.kg.serialize('ttl') f.write(data)
def model_schema(self): # read data data = self.read_data(self.data['schema']) # initialize KGSchema custom_dict, ns_dict = {}, {'wd': 'http://www.wikidata.org/entity/'} for each in data['prefix']: for k, v in each.items(): custom_dict[k] = v if k != 'wd': ns_dict[k] = v + '/entity' kg_schema = KGSchema() kg_schema.add_schema('@prefix : <http://isi.edu/> .', 'ttl') etk = ETK(kg_schema=kg_schema, modules=ETKModule) doc = etk.create_document({}, doc_id='http://isi.edu/default-ns/projects') # bind prefix doc = create_custom_prefix(doc, custom_dict) type_map = { 'quantity': Datatype.QuantityValue, 'url': URLValue, 'item': Datatype.Item, 'time': Datatype.TimeValue, 'string': Datatype.StringValue, 'text': Datatype.MonolingualText } # model schema for k, v in data.items(): if ':' in k: k = k.split(':') if 'Q' in k[1]: p = WDItem(k[1], namespace=k[0], creator=':datamart') elif 'P' in k[1]: p = WDProperty(k[1], type_map[v['type']], namespace=k[0], creator=':datamart') else: raise Exception('There is no P/Q information.') return None for lang, value in v['description'].items(): for val in value: p.add_description(val, lang=lang) for lang, value in v['label'].items(): for val in value: p.add_label(val, lang=lang) for node, value in v['statements'].items(): ns = node.split(':')[0] if ':' in node else 'wd' for val in value: prop_type = self.get_property_type(node, ns_dict[ns]) if prop_type == 'WikibaseItem': v = Item(str(val['value'])) elif prop_type == 'WikibaseProperty': v = Property(val['value']) elif prop_type == 'String': v = StringValue(val['value']) elif prop_type == 'Quantity': v = QuantityValue(val['value']) elif prop_type == 'Url': v = URLValue(val['value']) elif prop_type == 'Monolingualtext': v = MonolingualText(val['value'], val['lang']) p.add_statement(node, v) doc.kg.add_subject(p) return doc
def generate_triples(user_id: str, resolved_excel: list, sparql_endpoint: str, filetype: str = 'ttl', created_by: str = 't2wml') -> str: """ This function uses ETK to generate the RDF triples :param user_id: :param resolved_excel: :param sparql_endpoint: :param filetype: :return: """ # initialize kg_schema = KGSchema() kg_schema.add_schema('@prefix : <http://isi.edu/> .', 'ttl') etk = ETK(kg_schema=kg_schema, modules=ETKModule) doc = etk.create_document({}, doc_id="http://isi.edu/default-ns/projects") property_type_map = property_type_dict # bind prefixes doc.kg.bind('wikibase', 'http://wikiba.se/ontology#') doc.kg.bind('wd', 'http://www.wikidata.org/entity/') doc.kg.bind('wdt', 'http://www.wikidata.org/prop/direct/') doc.kg.bind('wdtn', 'http://www.wikidata.org/prop/direct-normalized/') doc.kg.bind('wdno', 'http://www.wikidata.org/prop/novalue/') doc.kg.bind('wds', 'http://www.wikidata.org/entity/statement/') doc.kg.bind('wdv', 'http://www.wikidata.org/value/') doc.kg.bind('wdref', 'http://www.wikidata.org/reference/') doc.kg.bind('p', 'http://www.wikidata.org/prop/') doc.kg.bind('pr', 'http://www.wikidata.org/prop/reference/') doc.kg.bind('prv', 'http://www.wikidata.org/prop/reference/value/') doc.kg.bind('prn', 'http://www.wikidata.org/prop/reference/value-normalized/') doc.kg.bind('ps', 'http://www.wikidata.org/prop/statement/') doc.kg.bind('psv', 'http://www.wikidata.org/prop/statement/value/') doc.kg.bind('psn', 'http://www.wikidata.org/prop/statement/value-normalized/') doc.kg.bind('pq', 'http://www.wikidata.org/prop/qualifier/') doc.kg.bind('pqv', 'http://www.wikidata.org/prop/qualifier/value/') doc.kg.bind('pqn', 'http://www.wikidata.org/prop/qualifier/value-normalized/') doc.kg.bind('skos', 'http://www.w3.org/2004/02/skos/core#') doc.kg.bind('prov', 'http://www.w3.org/ns/prov#') doc.kg.bind('schema', 'http://schema.org/') # property_type_cache = {} is_error = False for i in resolved_excel: _item = i["statement"]["item"] if _item is not None: item = WDItem(_item, creator='http://www.isi.edu/{}'.format(created_by)) try: property_type = property_type_map[i["statement"]["property"]] except KeyError: property_type = get_property_type(i["statement"]["property"], sparql_endpoint) if property_type != "Property Not Found" and i["statement"][ "property"] not in property_type_map: property_type_map[i["statement"] ["property"]] = property_type if property_type == "WikibaseItem": value = Item(str(i["statement"]["value"])) elif property_type == "WikibaseProperty": value = Property(i["statement"]["value"]) elif property_type == "String": value = StringValue(i["statement"]["value"]) elif property_type == "Quantity": _value = i["statement"]["value"] _value = str(_value).replace(',', '') value = QuantityValue(_value) elif property_type == "Time": value = TimeValue( str(i["statement"]["value"]), Item(i["statement"]["calendar"]), translate_precision_to_integer( i["statement"]["precision"]), i["statement"]["time_zone"]) elif property_type == "Url": value = URLValue(i["statement"]["value"]) elif property_type == "Monolingualtext": value = MonolingualText(i["statement"]["value"], i["statement"]["lang"]) elif property_type == "ExternalId": value = ExternalIdentifier(i["statement"]["value"]) elif property_type == "GlobeCoordinate": value = GlobeCoordinate(i["statement"]["latitude"], i["statement"]["longitude"], i["statement"]["precision"]) elif property_type == "Property Not Found": is_error = True break s = item.add_statement(i["statement"]["property"], value) doc.kg.add_subject(item) if "qualifier" in i["statement"]: for j in i["statement"]["qualifier"]: try: property_type = property_type_map[j["property"]] except KeyError: property_type = get_property_type( j["property"], sparql_endpoint) if property_type != "Property Not Found" and i[ "statement"][ "property"] not in property_type_map: property_type_map[i["statement"] ["property"]] = property_type if property_type == "WikibaseItem": value = Item(str(j["value"])) elif property_type == "WikibaseProperty": value = Property(j["value"]) elif property_type == "String": value = StringValue(j["value"]) elif property_type == "Quantity": value = QuantityValue(j["value"]) elif property_type == "Time": value = TimeValue(str(j["value"]), Item(j["calendar"]), j["precision"], j["time_zone"]) elif property_type == "Url": value = URLValue(j["value"]) elif property_type == "Monolingualtext": value = MonolingualText(j["value"], j["lang"]) elif property_type == "ExternalId": value = ExternalIdentifier(j["value"]) elif property_type == "GlobeCoordinate": value = GlobeCoordinate(j["latitude"], j["longitude"], j["precision"]) elif property_type == "Property Not Found": is_error = True if value is None: continue else: s.add_qualifier(j["property"], value) doc.kg.add_subject(s) if not is_error: data = doc.kg.serialize(filetype) else: # data = "Property Not Found" raise Exception('data exception while generating triples') return data
def generate_triples(user_id: str, resolved_excel: list, sparql_endpoint: str, filetype: str = 'ttl') -> str: """ This function uses ETK to generate the RDF triples :param user_id: :param resolved_excel: :param sparql_endpoint: :param filetype: :return: """ # initialize kg_schema = KGSchema() kg_schema.add_schema('@prefix : <http://isi.edu/> .', 'ttl') etk = ETK(kg_schema=kg_schema, modules=ETKModule) doc = etk.create_document({}, doc_id="http://isi.edu/default-ns/projects") # bind prefixes doc.kg.bind('wikibase', 'http://wikiba.se/ontology#') doc.kg.bind('wd', 'http://www.wikidata.org/entity/') doc.kg.bind('wdt', 'http://www.wikidata.org/prop/direct/') doc.kg.bind('wdtn', 'http://www.wikidata.org/prop/direct-normalized/') doc.kg.bind('wdno', 'http://www.wikidata.org/prop/novalue/') doc.kg.bind('wds', 'http://www.wikidata.org/entity/statement/') doc.kg.bind('wdv', 'http://www.wikidata.org/value/') doc.kg.bind('wdref', 'http://www.wikidata.org/reference/') doc.kg.bind('p', 'http://www.wikidata.org/prop/') doc.kg.bind('pr', 'http://www.wikidata.org/prop/reference/') doc.kg.bind('prv', 'http://www.wikidata.org/prop/reference/value/') doc.kg.bind('prn', 'http://www.wikidata.org/prop/reference/value-normalized/') doc.kg.bind('ps', 'http://www.wikidata.org/prop/statement/') doc.kg.bind('psv', 'http://www.wikidata.org/prop/statement/value/') doc.kg.bind('psn', 'http://www.wikidata.org/prop/statement/value-normalized/') doc.kg.bind('pq', 'http://www.wikidata.org/prop/qualifier/') doc.kg.bind('pqv', 'http://www.wikidata.org/prop/qualifier/value/') doc.kg.bind('pqn', 'http://www.wikidata.org/prop/qualifier/value-normalized/') doc.kg.bind('skos', 'http://www.w3.org/2004/02/skos/core#') doc.kg.bind('prov', 'http://www.w3.org/ns/prov#') doc.kg.bind('schema', 'http://schema.org/') # property_type_cache = {} is_error = False for i in resolved_excel: item = WDItem(i["statement"]["item"], creator='http://www.isi.edu/t2wml') try: property_type = property_type_map[i["statement"]["property"]] except KeyError: property_type = get_property_type(i["statement"]["property"], sparql_endpoint) property_type_map[i["statement"]["property"]] = property_type if property_type == "WikibaseItem": value = Item(str(i["statement"]["value"])) elif property_type == "WikibaseProperty": value = Property(i["statement"]["value"]) elif property_type == "String": value = StringValue(i["statement"]["value"]) elif property_type == "Quantity": value = QuantityValue(i["statement"]["value"]) elif property_type == "Time": value = TimeValue(str(i["statement"]["value"]), Item(i["statement"]["calendar"]), translate_precision_to_integer(i["statement"]["precision"]), i["statement"]["time_zone"]) elif property_type == "Url": value = URLValue(i["statement"]["value"]) elif property_type == "Monolingualtext": value = MonolingualText(i["statement"]["value"], i["statement"]["lang"]) elif property_type == "ExternalId": value = ExternalIdentifier(i["statement"]["value"]) elif property_type == "GlobeCoordinate": value = GlobeCoordinate(i["statement"]["latitude"], i["statement"]["longitude"], i["statement"]["precision"]) elif property_type == "Property Not Found": is_error = True break s = item.add_statement(i["statement"]["property"], value) doc.kg.add_subject(item) if "qualifier" in i["statement"]: for j in i["statement"]["qualifier"]: try: property_type = property_type_map[j["property"]] except KeyError: property_type = get_property_type(j["property"], sparql_endpoint) property_type_map[j["property"]] = property_type if property_type == "WikibaseItem": value = Item(str(j["value"])) elif property_type == "WikibaseProperty": value = Property(j["value"]) elif property_type == "String": value = StringValue(j["value"]) elif property_type == "Quantity": value = QuantityValue(j["value"]) elif property_type == "Time": value = TimeValue(str(j["value"]), Item(j["calendar"]), j["precision"], j["time_zone"]) elif property_type == "Url": value = URLValue(j["value"]) elif property_type == "Monolingualtext": value = MonolingualText(j["value"], j["lang"]) elif property_type == "ExternalId": value = ExternalIdentifier(j["value"]) elif property_type == "GlobeCoordinate": value = GlobeCoordinate(j["latitude"], j["longitude"], j["precision"]) elif property_type == "Property Not Found": is_error = True s.add_qualifier(j["property"], value) doc.kg.add_subject(s) if not is_error: data = doc.kg.serialize(filetype) else: data = "Property Not Found" # os.makedirs(Path.cwd() / "new_properties", exist_ok=True) # results_file_name = user_id + "_results.ttl" # changes_file_name = user_id + "_changes.tsv" # with open(Path(app.config['downloads']) / results_file_name, "w") as fp: # fp.write(data) # with open(Path(app.config['downloads']) / changes_file_name, "w") as fp: # serialize_change_record(fp) return data