vecFile = sys.argv[4] pathToSVMFile = sys.argv[5] pathToExpansionCache = sys.argv[6] pathToOutput = sys.argv[7] # open the rel rel = shelve.open(relFile) # open the vectors print "Loading vectors" vecs = load_vectors(vecFile) # read clusters and get their cluster centers by taking the average... print "Reading agglomerative cluster centers" clusterCenters = [ getAverageWordRep(x, vecs) for x in read_sets(clusterFile) ] # IT MIGHT HAPPEN THAT SOME CLUSTER CENTERS ARE ()? HOW IS THIS POSSIBLE? # set some remaining parameters expansion = 5 window = 5 svmFileInfo = '_SVM_' + clusterFile.split( '/')[-1] + "_expansionParam" + str(expansion) + "_window" + str(window) expansionCacheInfo = "_expansionParam_" + str(expansion) wordsOfInterest = [x.split("_")[0] for x in os.listdir(pathToSVMFile)] #print wordsOfInterest f = open(pathToOutput, 'r') StartIndex = len(f.readline().split(" ")) - 10 print "Start index: ", StartIndex
word2Dic = cache[word2] context1 = [ Word(x).lemma() for x in question['context1'].lower().split(' ') ] context2 = [ Word(x).lemma() for x in question['context2'].lower().split(' ') ] senseWord1 = getCorrectSense(context1, word1Dic[0], word1Dic[1]) senseWord2 = getCorrectSense(context2, word2Dic[0], word2Dic[1]) wordvec1 = getAverageWordRep(senseWord1, vectors) wordvec2 = getAverageWordRep(senseWord2, vectors) rh = question['rating'] rc = cosine_similarity(wordvec1, wordvec2) methodsRating.append(rc) humanRating.append(rh) if len(methodsRating) > 2: print "\t\tScore:", spearman(methodsRating, humanRating) else: (rh, rc) = (999, 999) resultsSelve[str(i)] = (rh, rc) stop = time() print "Done in", int(stop - start + 0.5), "seconds."
print "\tIteration:", i if word1 not in cache: cache[word1] = makeNewCOCS(word1, rel) if word2 not in cache: cache[word2] = makeNewCOCS(word2, rel) word1Dic = cache[word1] word2Dic = cache[word2] context1 = [ Word(x).lemma() for x in question['context1'].lower().split(' ') ] context2 = [ Word(x).lemma() for x in question['context2'].lower().split(' ') ] senseWord1 = getCorrectSense(context1, word1Dic[0], word1Dic[1]) senseWord2 = getCorrectSense(context2, word2Dic[0], word2Dic[1]) wordvec1 = getAverageWordRep(senseWord1, vectors) wordvec2 = getAverageWordRep(senseWord2, vectors) rh = question['rating'] rc = cosine_similarity(wordvec1, wordvec2) methodsRating.append(rc) humanRating.append(rh) if len(methodsRating) > 2: print "\t\tScore:", spearman(methodsRating, humanRating) else: (rh,rc) = (999,999) resultsSelve[str(i)] = (rh,rc) stop = time() print "Done in", int(stop - start + 0.5), "seconds."
normalVectorsFile = sys.argv[7] expansion = 5 window = 5 svmFileInfo = '_SVM_' + clusterFile.split('/')[-1] + "_expansionParam" + str(expansion) + "_window" + str(window) expansionCacheInfo = "_expansionParam_" + str(expansion) print "Loading rel, task, vector, words that have been disambiguated" rel = shelve.open(relFile) task, tralala = load_task(taskFilename) vectors = load_vectors(vectorsFilename) normalVectors = load_vectors(normalVectorsFile) disambiguatedWords = [x.split("_")[0] for x in os.listdir(pathToSVMFile)] print "Reading agglomerative cluster centers" clusterCenters = [getAverageWordRep(x, vectors) for x in read_sets(clusterFile)] print "Starting..." # initiate empty ratings methodsRating = [] humanRating = [] questions = task.values() jointVocCache = dict() partVoc = set(vectors.keys()) print len(disambiguatedWords), "disambiguated words" done = 0 for i in xrange(len(questions)): question = questions[i]
pathToNormalVectors = sys.argv[7] expansion = 5 window = 5 svmFileInfo = '_SVM_' + clusterFile.split('/')[-1] + "_expansionParam" + str(expansion) + "_window" + str(window) expansionCacheInfo = "_expansionParam_" + str(expansion) print "Loading rel, task, vector, words that have been disambiguated" rel = shelve.open(relFile) task, tralala = load_task(taskFilename) vectors = load_vectors(vectorsFilename) normalVectors = load_vectors(pathToNormalVectors) disambiguatedWords = [x.split("_")[0] for x in os.listdir(pathToSVMFile)] print "Reading agglomerative cluster centers" clusterCenters = [getAverageWordRep(x, vectors) for x in read_sets(clusterFile)] print "Starting..." # initiate empty ratings methodsRating = [] humanRating = [] questions = task.values() jointVocCache = dict() partVoc = set(vectors.keys()) print len(disambiguatedWords), "disambiguated words" done = 0 for i in xrange(len(questions)): question = questions[i]
pathToSVMFile = sys.argv[5] pathToExpansionCache = sys.argv[6] pathToTask = sys.argv[7] alreadyDisambiguatedWords = set([x.split("_")[0] for x in os.listdir(pathToSVMFile)]) # open the rel rel = shelve.open(relFile) # open the vectors print "Loading vectors" vecs = load_vectors(vecFile) # read clusters and get their cluster centers by taking the average... print "Reading agglomerative cluster centers" agglomerativeClusterCenters = [getAverageWordRep(x, vecs) for x in read_sets(clusterFile)] # set some parameters expansion = 5 window = 5 # get the words that occur in the task and need to be compared _, wordsToSplit = load_task(pathToTask) indexCache = dict() wordsToSplit = filter(lambda x: x not in alreadyDisambiguatedWords, wordsToSplit) total = len(wordsToSplit) for i, word in enumerate(wordsToSplit): # progess