示例#1
0
    "word sense": "Q22504",
    "word-form": "Q22505",
    "word-formation": "Q22506",
    "wordform": "Q22507",
    "wordnet": "Q22508"
}

with open(config.datafolder + "/terms/SkE terms for SKOS vocab - batch 1.csv",
          'r',
          encoding="utf-8") as csvfile:
    csvdict = csv.DictReader(csvfile)

    for item in csvdict:
        time.sleep(1)
        print(str(item))

        if item['SKOS Concept URI'] != "":
            lwbqid = lwb.getqid("Q7", item['SKOS Concept URI'])
        else:
            lwbqid = map[item['Keyword4newScheme']]

        schemeStatement = lwb.updateclaim(lwbqid, "P74", "Q22279",
                                          "item")  # skos:inScheme SkE #1

        scoreStatement = lwb.updateclaim(lwbqid, "P82", item['SkE score'],
                                         "string")
        lwb.setqualifier(lwbqid, "P82", scoreStatement, "P83", "Q22279",
                         "item")
        lwb.setqualifier(lwbqid, "P82", scoreStatement, "P88",
                         item['Keyword4newScheme'], "string")
示例#2
0
    "http://www.w3.org/2004/02/skos/core#exactMatch": "P78",
    "http://www.w3.org/2004/02/skos/core#relatedMatch": "P79",
    "http://www.w3.org/2004/02/skos/core#definition": "P80"
}

with open('D:/LexBib/terms/SKOS4lwb.json', encoding="utf-8") as f:
    data = json.load(f)['results']['bindings']

count = 1
for row in data:
    print('\nTriple [' + str(count) + '], ' + str(len(data) - count) +
          ' triples left.')
    lwbs = lwb.getqid("Q7", row['s']['value'])

    if row['p']['value'] in propmap:
        if row['o']['type'] == "literal":
            statement = lwb.updateclaim(lwbs, propmap[row['p']['value']],
                                        row['o']['value'].rstrip(), "string")
        else:
            lwbo = lwb.getqid("Q7", row['o']['value'].rstrip())
            statement = lwb.updateclaim(lwbs, propmap[row['p']['value']], lwbo,
                                        "item")
    elif row['p']['value'] == "http://www.w3.org/2004/02/skos/core#prefLabel":
        lwb.setlabel(lwbs, row['o']['xml:lang'], row['o']['value'].rstrip())
    elif row['p']['value'] == "http://www.w3.org/2004/02/skos/core#altLabel":
        lwb.setlabel(lwbs,
                     row['o']['xml:lang'],
                     row['o']['value'].rstrip(),
                     type="alias")
    count += 1
示例#3
0
import json
import lwb

with open('D:/LexBib/journals/article_issn.json', encoding="utf-8") as f:
    itemdict = json.load(f)
with open('D:/LexBib/journals/issn_journals.json', encoding="utf-8") as f:
    journaldict = json.load(f)
issndict = {}
for journal in journaldict:
    issndict[journal['issn']] = journal['journal'].replace(
        "http://data.lexbib.org/entity/", "")

count = 0
for item in itemdict:
    count += 1
    lwbqid = item['item'].replace("http://data.lexbib.org/entity/", "")
    lwb.updateclaim(lwbqid, "P46", issndict[item['issn']], "item")

    print('OK. ' + str(len(itemdict) - count) + ' items left.')
	count = 1
	for item in data:
		print('\nItem ['+str(count)+'].')
		bibItem = item['bibItem'].replace("http://data.lexbib.org/entity/","")
		print('BibItem is '+bibItem+'.')
		oldStatement = re.sub(guidfix, r'\1$', item['statement_id'])
		if 'Qid' in item and item['Qid'].startswith("Q"):
			lwbqid = re.search(guidfix, item['statement_id']).group(1)
			creatorqid = item['Qid']
			#print(lwbqid, oldStatement)
			claim = lwb.getclaimfromstatement(oldStatement)
			if "P39" in claim:
				newprop = "P12"
				listpos = claim["P39"][0]['qualifiers']["P33"][0]['datavalue']['value']
			elif "P42" in claim:
				newprop = "P13"
				listpos = claim["P42"][0]['qualifiers']["P33"][0]['datavalue']['value']
			else:
				print('*** Something is wrong with this supposed creator literal statement')
				time.sleep(10)

			newStatement = lwb.updateclaim(lwbqid,newprop,creatorqid,"item")
			lwb.setqualifier(lwbqid,newprop,newStatement,"P33",listpos,"string")
			lwb.setqualifier(lwbqid,newprop,newStatement,"P67",item["firstName"]+" "+item["lastName"],"string")

			lwb.removeclaim(oldStatement)
		else:
			print('We have no item for author '+item['creatorName'])
			time.sleep(1)
		count +=1
示例#5
0
        while (not done):
            try:
                request = wikidata.get('wbgetclaims',
                                       entity=wdqid,
                                       property=wdprop)
                if "claims" in request:
                    done = True
            except Exception as ex:
                print('Getclaims operation failed, will try again...\n' +
                      str(ex))
                time.sleep(4)
            if bool(request['claims']):
                value = request['claims'][wdprop][0]['mainsnak']['datavalue'][
                    'value']
                #print(lwbqid+prop+value)
                statement = lwb.updateclaim(lwbqid, prop, value, "url")

# lwbsparql = SPARQLWrapper("https://data.lexbib.org/query/sparql", agent='LexBib (lexbib.org)')
# lwbsparql.setQuery("""PREFIX ldp: <http://data.lexbib.org/prop/direct/>
# 					SELECT ?item ?class WHERE {
# 						?item ldp:P5 ?class .
# 					}""")
# lwbsparql.setReturnFormat(JSON)
# while True:
# 	try:
# 		time.sleep(1.5)
# 		result = lwbsparql.query().convert()
# 		datalist = result['results']['bindings']
# 		print(str(result))
# 		break
# 	except Exception as ex:
示例#6
0
import json
import lwb

with open('D:/LexBib/terms/langdict.json', encoding="utf-8") as f:
    langdict = json.load(f)

with open('D:/LexBib/languages/publangs.txt', encoding="utf-8") as f:
    publangs = f.read().split('\n')

count = 0
for lang in publangs:
    count += 1
    print('\nLine [' + str(count) + '] of ' + str(len(publangs)) + ': ' + lang)

    qid = lwb.getqid(["Q8"], lang)  # class Language
    statement = lwb.updateclaim(qid, "P4", langdict[lang]['wdqid'], "url")
    statement = lwb.updateclaim(qid, "P32", lang, "url")
    for label in langdict[lang]['labels']:
        statement = lwb.setlabel(qid, label, langdict[lang]['labels'][label])
    print('OK. ' + str(len(publangs) - count) + ' languages left.')

print('\nFinished.\n')
示例#7
0
        wdqid = datalist[0]['journal']['value']
        label = datalist[0]['journalLabel']['value']
        success = 1
        regexp = re.compile(r'Q\d+')
        if regexp.search(label):
            label = ""

    except Exception as ex:
        print("ISSN " + issn +
              " not found on wikidata, skipping, will add to orphaned list.")
        orphaned += issn + '\tnot found on wikidata.\n'
        continue

    # create lwb serial for this orphaned issn
    lwbqid = lwb.getqid("Q20", wdqid)  # for serials, wdqid is also lexbib uri
    statement = lwb.updateclaim(lwbqid, "P3", wdqid, "url")
    statement = lwb.updateclaim(lwbqid, "P20", issn, "string")
    statement = lwb.updateclaim(lwbqid, "P4", wdqid, "url")
    statement = lwb.setlabel(lwbqid, "en", label)

    # add P46 "contained in serial" to bibitems with that issn
    # get bibitems

    url = "https://data.lexbib.org/query/sparql?format=json&query=PREFIX%20lwb%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fentity%2F%3E%0APREFIX%20ldp%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fprop%2Fdirect%2F%3E%0A%0Aselect%20%3FbibItem%20%3Fissn%20%3Fjournal%20where%0A%7B%20%3FbibItem%20ldp%3AP5%20lwb%3AQ3%20.%0A%20%20%3FbibItem%20ldp%3AP20%20%3Fissn%20.%0A%20%3Fjournal%20ldp%3AP5%20lwb%3AQ20%20.%0A%20%3Fjournal%20ldp%3AP20%20%3Fissn%20.%0A%20FILTER%20%28%3Fissn%20%3D%20%22" + issn + "%22%29%7D"
    done = False
    while (not done):
        try:
            r2 = requests.get(url)
            bindings2 = r2.json()['results']['bindings']
        except Exception as ex:
            print('Error: SPARQL request failed: ' + str(ex))
示例#8
0
            if rep > 4:  # break 'while' loop after 5 failed attempts to process item
                print(
                    '\nbibimport.py has entered in an endless loop... abort.')
                break
            else:
                print('\n' + str(index) + ' items processed. ' +
                      str(totalrows - index) + ' list items left.\n')
                #time.sleep(1)
                rep += 1

                try:
                    item = data[index]
                    qid = lwb.getqid(
                        "Q3", item['lexbibUri'])  # Q3: LexBib BibItem class
                    classStatement = lwb.updateclaim(qid, "P5",
                                                     item['lexbibClass'],
                                                     "item")
                    for triple in item['creatorvals']:
                        #check if creator with that position is already there as item (not literal)
                        skip = False
                        if triple['property'] == "P39":
                            itemprop = "P12"
                        elif triple['property'] == "P42":
                            itemprop = "P13"
                        for Qualifier in triple['Qualifiers']:
                            if Qualifier['property'] == "P33":
                                listpos = Qualifier['value']
                                print(
                                    'Found ' + triple['property'] +
                                    ' creator listpos: ', listpos)
                        creator_item_claims = lwb.getclaims(qid, itemprop)
 #print(str(item))
 bibItem = item['bibItem'].replace("http://data.lexbib.org/entity/", "")
 print('BibItem is ' + bibItem + '.')
 oldStatement = re.sub(guidfix, r'\1$', item['statement_id'])
 bibitemqid = re.search(guidfix, item['statement_id']).group(1)
 if 'Qid' in item and item['Qid'].startswith("Q"):
     creatorqid = item['Qid']
     creatorPrefLabel = lwb.getlabel(creatorqid, "en")
     print('This is a known creator item: ' + creatorqid + ' ' +
           creatorPrefLabel)
 else:
     print('We have no item for author ' + item['creatorName'] +
           ', will set up a new item.')
     creatorqid = lwb.newitemwithlabel("Q5", "en", item['creatorName'])
     creatorPrefLabel = item['creatorName']
     lwb.updateclaim(creatorqid, "P40", item['firstName'], "string")
     lwb.updateclaim(creatorqid, "P41", item['lastName'], "string")
 claim = lwb.getclaimfromstatement(oldStatement)
 if "P39" in claim:
     newprop = "P12"
     listpos = claim["P39"][0]['qualifiers']["P33"][0]['datavalue'][
         'value']
 elif "P42" in claim:
     newprop = "P13"
     listpos = claim["P42"][0]['qualifiers']["P33"][0]['datavalue'][
         'value']
 else:
     print(
         '*** Something is wrong with this supposed creator literal statement'
     )
     time.sleep(10)
示例#10
0
for prop in props:
    if prop == "en.wiki":
        # get en.wikipedia url and write it to LWB using P66
        itemcount = 1
        for item in lwbitems:
            print('\nItem [' + str(itemcount) + '], ' +
                  str(len(lwbitems) - itemcount) + ' items left.')
            wdqid = item['wdqid']['value'].replace(
                "http://www.wikidata.org/entity/", "")
            lwbqid = item['item']['value'].replace(
                "http://data.lexbib.org/entity/", "")
            print('Will now get en.wikipedia page url for LWB item: ' +
                  lwbqid + ' from wdItem: ' + wdqid)
            enwikiurl = lwb.get_wikipedia_url_from_wikidata_id(
                wdqid, lang='en')  #, debug=True)
            lwb.updateclaim(lwbqid, "P66", enwikiurl, "url")
            itemcount += 1
    elif prop == "en.label":
        # get label (English), and write it to LWB
        itemcount = 1
        for item in lwbitems:
            print('\nItem [' + str(itemcount) + '], ' +
                  str(len(lwbitems) - itemcount) + ' items left.')
            wdqid = item['wdqid']['value'].replace(
                "http://www.wikidata.org/entity/", "")
            lwbqid = item['item']['value'].replace(
                "http://data.lexbib.org/entity/", "")
            print('Will now get label (English) for LWB item: ' + lwbqid +
                  ' from wdItem: ' + wdqid)
            done = False
            while (not done):
示例#11
0
url = "https://data.lexbib.org/query/sparql?format=json&query=PREFIX%20lwb%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fentity%2F%3E%0APREFIX%20ldp%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fprop%2Fdirect%2F%3E%0APREFIX%20lp%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fprop%2F%3E%0APREFIX%20lps%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fprop%2Fstatement%2F%3E%0APREFIX%20lpq%3A%20%3Chttp%3A%2F%2Fdata.lexbib.org%2Fprop%2Fqualifier%2F%3E%0A%23%20%28group_concat%28%28strafter%28%3Fbibitem%2C%22http%3A%2F%2Fdata.lexbib.org%2Fentity%2F%22%29%29%3B%20separator%20%3D%20%22%40%22%29%20as%20%3Fbibitems%29%0Aselect%20distinct%20%3Fissue%20%3Fissuelabel%20%3Fissn%20%3Fjournal%20%3Fjournallabel%20%28group_concat%28%28strafter%28str%28%3Fbibitem%29%2C%22http%3A%2F%2Fdata.lexbib.org%2Fentity%2F%22%29%29%3B%20separator%20%3D%20%22%40%22%29%20as%20%3Fbibitems%29%20where%0A%7B%3Fissue%20ldp%3AP5%20lwb%3AQ1907%20.%0A%20%3Fissue%20rdfs%3Alabel%20%3Fissuelabel%20.%0A%20%3Fbibitem%20ldp%3AP9%20%3Fissue%20.%0A%20%3Fbibitem%20ldp%3AP20%20%3Fissn%20.%0A%20%3Fjournal%20ldp%3AP5%20lwb%3AQ20%20.%0A%20%3Fjournal%20ldp%3AP20%20%3Fissn%20.%0A%20%3Fjournal%20rdfs%3Alabel%20%3Fjournallabel%20.%0A%0A%20%20%7D%0AGROUP%20BY%20%3Fissue%20%3Fissuelabel%20%3Fissn%20%3Fjournal%20%3Fjournallabel%20%3Fbibitems"

done = False
while (not done):
	try:
		r = requests.get(url)
		bindings = r.json()['results']['bindings']
	except Exception as ex:
		print('Error: SPARQL request failed: '+str(ex))
		time.sleep(2)
		continue
	done = True
#print(str(bindings))

print('Found '+str(len(bindings))+' journal issues...\n')
time.sleep(3)

count = 0
for item in bindings:
	count +=1
	issueqid = item['issue']['value'].replace("http://data.lexbib.org/entity/","")
	issn = item['issn']['value']
	lwb.updateclaim(issueqid,"P20",issn,"string")
	journalqid = item['journal']['value'].replace("http://data.lexbib.org/entity/","")
	lwb.updateclaim(issueqid,"P46",journalqid,"item")

	for bibitem in item['bibitems']['value'].split('@'):
			lwb.updateclaim(bibitem,"P46",journalqid,"item")
	print('OK. '+str(len(bindings)-count)+' items left.\n')
示例#12
0
# get csv (part of google spreadsheet used for manual BabelID annotation)
with open('D:/LexBib/terms/term_bnid_status_labels.csv') as csvfile:
    termdict = csv.DictReader(csvfile)
    termlist = list(termdict)
    print(str(termlist))
    totalrows = len(termlist)
    #print(str(termdict))
    count = 1
    processed = []
    for row in termlist:

        print('\nNow processing term ' + str(count) + ' of ' + str(totalrows) +
              ': ' + row["term"])
        lwbqid = lwb.getqid("Q7", row['term'])
        if row['term'] not in processed and row["status"] != "":
            if row['bnid'].startswith("bn:"):
                statement = lwb.updateclaim(lwbqid, "P86", row['bnid'],
                                            "string")
                qualifier = lwb.setqualifier(lwbqid, "P86", statement, "P87",
                                             row['status'], "string")
                reference = lwb.setref(statement, "P3", row['term'], "url")
            elif row['bnid'] == "" and row['status'] == "0":
                statement = lwb.updateclaim(lwbqid, "P86", "novalue",
                                            "novalue")
                qualifier = lwb.setqualifier(lwbqid, "P86", statement, "P87",
                                             "0", "string")
                reference = lwb.setref(statement, "P3", row['term'], "url")
        processed.append(row['term'])
        count += 1
示例#13
0
import json
import lwb
import csv

propmap = {
    # "http://www.w3.org/2004/02/skos/core#broader": "P72",
    # "http://www.w3.org/2004/02/skos/core#inScheme": "P74",
    # "http://www.w3.org/2004/02/skos/core#narrower": "P73",
    # "http://www.w3.org/2004/02/skos/core#topConceptOf": "P75",
    # "http://www.w3.org/2004/02/skos/core#note": "P81",
    # "http://www.w3.org/2004/02/skos/core#related": "P76",
    # "http://www.w3.org/2004/02/skos/core#closeMatch": "P77",
    # "http://www.w3.org/2004/02/skos/core#exactMatch": "P78",
    # "http://www.w3.org/2004/02/skos/core#relatedMatch": "P79",
    "http://www.w3.org/2004/02/skos/core#definition": "P80"
}

with open('D:/LexBib/terms/SKOS_defs_fix.csv', encoding="utf-8") as f:
    data = csv.DictReader(f)

    count = 1
    for row in data:
        print('\nDef [' + str(count) + ']: ' + row['subject'])
        lwbs = lwb.getqid("Q7", row['subject'])

        statement = lwb.updateclaim(lwbs, "P80", row['def'], "string")
        reference = lwb.setref(statement, "P3", row['subject'], "url")
        # 	lwb.setlabel(lwbs, row['o']['xml:lang'], row['o']['value'].rstrip(), type="alias")
        count += 1