def getFBRelations(w): # check for memory, else get new relations if w in freebaseRelations: print('Accessed from local store!') return freebaseRelations[w] print('Getting FB relations for word: ', w) neighbors = [] mids = searchQuery(w) for mid in mids: triples = topicQuery(mid) for triple in triples: t, _ = triple # print(triple) # Check if words are same as |query| words = getWords(t[0]) + getWords(t[2]) for word in words: # exclude blanks strings if word != w.lower() and len(word) > 0: neighbors.append(word) # get rid of duplicates neighbors = list(set(neighbors)) # Save relations to memory freebaseRelations[w] = neighbors utils.saveData(freebaseRelations, cache + 'FB_relations.p') print('FB relations saved:', len(neighbors)) return neighbors
def getWNRelations(w): # print('Getting WordNet relations for word: ', w) if w in wnRelations: # print('Accessed from local store!') return wnRelations[w] neighbors = [] for synset in wn.synsets(w): hypernyms = synset.hypernyms() for h in hypernyms: for l in h.lemmas(): full_name = getWords(l.name()) for word in full_name: # not the same word, not empty string if word != w.lower() and len(word) > 0: neighbors.append(word) # get rid of duplicates neighbors = list(set(neighbors)) # save locally wnRelations[w] = neighbors utils.saveData(wnRelations, cache + 'WN_relations.p') print('WN relations saved:', len(neighbors)) return neighbors
def getSecondOrderKeywords(self): keywords = [] for num, question in enumerate(self.test): print('\nQuestion {} ---------------------------'.format(num+1)) if self.dataType == 'val': questionText, answers = question else: questionText, answers, correctAnswer = question # If available locally, return it if questionText in localKeywords: print('keywords accessed locally') keywords += localKeywords[questionText] else: wordGraph = WordGraph(questionText, self.N) newKeywords = wordGraph.getSecondOrderKeywords() keywords += newKeywords localKeywords[questionText] = newKeywords utils.saveData(localKeywords, cache + 'keywords.p') print('keywords saved.') keywords = list(set(keywords)) print('{} second order keywords found from {} questions'.format(len(keywords), num)) return keywords
if os.path.isfile(cache + 'FB_relations.p'): freebaseRelations = utils.loadData(cache + 'FB_relations.p') else: freebaseRelations = {} # Setup for worker pool poolWorkerNum = 200 poolIterations = 2 poolRedundancies = False # Get all keywords eightGradeExam = Test(start=0, end=8132, dataType='val', N=6) keywords = eightGradeExam.getSecondOrderKeywords() # save second order keywords utils.saveData(keywords, cache + 'SecondOrderKeywords.p') print('Keywords saved.') # Filter keywords already in local freebaseRelations keywords = [kw for kw in keywords if kw not in freebaseRelations] print('Number of first order keywords left: {}'.format(len(keywords))) start_download = time.time() # Get keywords from Freebase freebaseChunk2Mids, freebaseMid2Triples = scraper.getFreebaseCompendium(keywords, workerNum = poolWorkerNum, iterations=poolIterations, redundancies=poolRedundancies) end_download = time.time() start_unpack = time.time()
def takeTest(self): self.reset() densityCorrect = 0 searchCorrect = 0 w2vCorrect = 0 # Take test for num, question in enumerate(self.test): print('\nQuestion {} ---------------------------'.format(num+1)) # Think about question -> Generate scene start = time.time() questionText, answers, correctAnswer = question print('Question: {}'.format(questionText)) # save mindmap for question if questionText in self.mindmaps: print('Mindmap accessed from local store!') wordGraph = self.mindmaps[questionText] else: wordGraph = WordGraph(questionText, self.N) self.mindmaps[questionText] = wordGraph utils.saveData(self.mindmaps, cache + 'mindmaps.p') print('Mindmap saved.') keywords = wordGraph.questionKeywords # Get density & search scores densityScores = [] # searchScores = [] # word2vecScores = [] for ans in answers: questionGraph = copy.deepcopy(wordGraph) densityScore = questionGraph.getAnswerScore(ans) densityScores.append(densityScore) # searchScores.append(searchScore) # word2vecScores.append(util.averageSimilarity(keywords, ans)) # Mark using density score density_index = densityScores.index(max(densityScores)) if self.LETTERS[density_index] == correctAnswer: self.correct += 1 densityCorrect += 1 else: self.incorrect += 1 # Mark question using search scores # search_index = searchScores.index(min(searchScores)) # if self.LETTERS[search_index] == correctAnswer: # searchCorrect += 1 # Mark question using word2vec # w2v_index = word2vecScores.index(max(word2vecScores)) # if self.LETTERS[search_index] == correctAnswer: # w2vCorrect += 1 end = time.time() self.answerReport.append((densityScores, density_index, correctAnswer)) self.timeReport.append(end - start) print('Out of {} questions'.format(len(self.test))) print('Density: {}'.format(densityCorrect))