Пример #1
0
def getFBRelations(w):
	# check for memory, else get new relations
	if w in freebaseRelations:
		print('Accessed from local store!')
		return freebaseRelations[w]
	print('Getting FB relations for word: ', w)
	neighbors = []
	mids = searchQuery(w)
	for mid in mids:
		triples = topicQuery(mid)
		for triple in triples:
			t, _ = triple
			# print(triple)
			# Check if words are same as |query|
			words = getWords(t[0]) + getWords(t[2])
			for word in words:
				# exclude blanks strings
				if word != w.lower() and len(word) > 0:
					neighbors.append(word)

	# get rid of duplicates
	neighbors = list(set(neighbors))

	# Save relations to memory
	freebaseRelations[w] = neighbors
	utils.saveData(freebaseRelations, cache + 'FB_relations.p')
	print('FB relations saved:', len(neighbors))

	return neighbors
Пример #2
0
def getWNRelations(w):
	# print('Getting WordNet relations for word: ', w)
	if w in wnRelations:
		# print('Accessed from local store!')
		return wnRelations[w]

	neighbors = []
	for synset in wn.synsets(w):
		hypernyms = synset.hypernyms()
		for h in hypernyms:
			for l in h.lemmas():
				full_name = getWords(l.name())
				for word in full_name:
					# not the same word, not empty string
					if word != w.lower() and len(word) > 0: 
						neighbors.append(word)

	# get rid of duplicates
	neighbors = list(set(neighbors))

	# save locally
	wnRelations[w] = neighbors
	utils.saveData(wnRelations, cache + 'WN_relations.p')	
	print('WN relations saved:', len(neighbors))

	return neighbors
Пример #3
0
	def getSecondOrderKeywords(self):
		keywords = []
		for num, question in enumerate(self.test):
			print('\nQuestion {} ---------------------------'.format(num+1))
			if self.dataType == 'val':
				questionText, answers = question
			else:
				questionText, answers, correctAnswer = question

			# If available locally, return it
			if questionText in localKeywords:
				print('keywords accessed locally')
				keywords += localKeywords[questionText]
			else:
				wordGraph = WordGraph(questionText, self.N)
				newKeywords = wordGraph.getSecondOrderKeywords() 
				keywords += newKeywords

				localKeywords[questionText] = newKeywords
				utils.saveData(localKeywords, cache + 'keywords.p')
				print('keywords saved.')

		keywords = list(set(keywords))
		print('{} second order keywords found from {} questions'.format(len(keywords), num))
		return keywords
Пример #4
0
if os.path.isfile(cache + 'FB_relations.p'): freebaseRelations = utils.loadData(cache + 'FB_relations.p')
else:
	freebaseRelations = {}

# Setup for worker pool
poolWorkerNum = 200
poolIterations = 2
poolRedundancies = False

# Get all keywords
eightGradeExam = Test(start=0, end=8132, dataType='val', N=6)

keywords = eightGradeExam.getSecondOrderKeywords()

# save second order keywords
utils.saveData(keywords, cache + 'SecondOrderKeywords.p')
print('Keywords saved.')

# Filter keywords already in local freebaseRelations
keywords = [kw for kw in keywords if kw not in freebaseRelations]
print('Number of first order keywords left: {}'.format(len(keywords)))

start_download = time.time()

# Get keywords from Freebase
freebaseChunk2Mids, freebaseMid2Triples = scraper.getFreebaseCompendium(keywords, workerNum = poolWorkerNum, iterations=poolIterations, redundancies=poolRedundancies)

end_download = time.time()

start_unpack = time.time()
Пример #5
0
	def takeTest(self):
		self.reset()
		densityCorrect = 0
		searchCorrect = 0
		w2vCorrect = 0
		# Take test
		for num, question in enumerate(self.test):
			print('\nQuestion {} ---------------------------'.format(num+1))
			# Think about question -> Generate scene
			start = time.time()
			questionText, answers, correctAnswer = question

			print('Question: {}'.format(questionText))

			# save mindmap for question
			if questionText in self.mindmaps:
				print('Mindmap accessed from local store!')
				wordGraph = self.mindmaps[questionText]
			else:
				wordGraph = WordGraph(questionText, self.N)
				self.mindmaps[questionText] = wordGraph
				utils.saveData(self.mindmaps, cache + 'mindmaps.p')
				print('Mindmap saved.')

			keywords = wordGraph.questionKeywords

			# Get density & search scores
			densityScores = []
			# searchScores = []
			# word2vecScores = []
			for ans in answers:
				questionGraph = copy.deepcopy(wordGraph)
				densityScore = questionGraph.getAnswerScore(ans)
				densityScores.append(densityScore)
				# searchScores.append(searchScore)
				# word2vecScores.append(util.averageSimilarity(keywords, ans))

			# Mark using density score
			density_index = densityScores.index(max(densityScores))
			if self.LETTERS[density_index] == correctAnswer:
				self.correct += 1
				densityCorrect += 1
			else:
				self.incorrect += 1

			# Mark question using search scores
			# search_index = searchScores.index(min(searchScores))
			# if self.LETTERS[search_index] == correctAnswer:
			# 	searchCorrect += 1

			# Mark question using word2vec
			# w2v_index = word2vecScores.index(max(word2vecScores))
			# if self.LETTERS[search_index] == correctAnswer:
			# 	w2vCorrect += 1

			end = time.time()

			self.answerReport.append((densityScores, density_index, correctAnswer))
			self.timeReport.append(end - start)

		print('Out of {} questions'.format(len(self.test)))
		print('Density: {}'.format(densityCorrect))