示例#1
0
get_graph_query = """SELECT ?label ?graph
WHERE {
GRAPH ?graph {dbpedia:Angela_Merkel rdfs:label ?label} 
}"""
print(ks.run_sparql_query(get_graph_query))
print("")

birth_location_query = """SELECT ?p 
WHERE {
{ dbpedia:Angela_Merkel dbo:birthPlace ?p } UNION { dbpedia:Willy_Brandt dbo:birthPlace ?p} }"""
print(ks.run_sparql_query(birth_location_query))
print("")

mention_result = ks.run_mention_query(
    "http://en.wikinews.org/wiki/SEALs_say_US_officer's_cover-up_was_reported_by_fake_SEAL#char=62,67",
    "nwr:pos")
print(mention_result)
print("")

resource_result = ks.run_resource_query(
    "http://en.wikinews.org/wiki/SEALs_say_US_officer's_cover-up_was_reported_by_fake_SEAL",
    "ks:hasMention")
print("")
print(len(resource_result))

text_result = ks.run_files_query(
    "http://en.wikinews.org/wiki/SEALs_say_US_officer's_cover-up_was_reported_by_fake_SEAL"
)
print(text_result)
print(len(text_result))
示例#2
0
print("Answer for question 1: {0}".format(counter))

# second question
sparql_query = "SELECT DISTINCT ?m WHERE { dbpedia:Angela_Merkel gaf:denotedBy ?m }"
sparql_result = ks.run_sparql_query(sparql_query)

resource_uris = []
for binding in sparql_result:
    mention_uri = binding['m']
    resource_uri = ks.mention_uri_to_resource_uri(mention_uri)
    if resource_uri not in resource_uris:
        resource_uris.append(resource_uri)

counter = 0
all_mappings = ks.get_all_resource_category_mappings(
    ks.top_level_category_names)
for resource_uri in resource_uris:
    if "Economy and business" in all_mappings[resource_uri]:
        counter += 1

print("Answer for question 2: {0}".format(counter))

# third question
resource_uri = "http://en.wikinews.org/wiki/Christian_Wulff_elected_Germany's_new_president"
text = ks.run_files_query(resource_uri)
sentences = nltk.sent_tokenize(text)
sentence = sentences[0]
tokenized = nltk.word_tokenize(sentence)
pos_tagged = nltk.pos_tag(tokenized)
chunked = nltk.ne_chunk(pos_tagged)
print(chunked)
示例#3
0
import knowledgestore.ks as ks
import nltk
from nltk.corpus import wordnet as wn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import gensim

print('\nSETUP')
with open("data_set_split.pickle", "rb") as f:
    data_set = pickle.load(f)

train = data_set['Sports']['train']

articles = []
for i in range(10):
    articles.append(ks.run_files_query(train[i][0]))

# # computing most frequent bigrams
# print('\nMOST FREQUENT BIGRAMS')
# text = ' '.join(articles)
# tokens = nltk.word_tokenize(text)
# tokens = [token for token in tokens if token not in string.punctuation]
# bigrams = nltk.bigrams(tokens)
# freq_dist = nltk.FreqDist(bigrams)
# frequency_list = []
# for bigram, freq in freq_dist.items():
#     frequency_list.append([bigram, freq])
# frequency_list.sort(key = lambda x: x[1], reverse=True)
# for i in range(10):
#     print(frequency_list[i])
#