Python getAverageWordRep示例，fast_utils.getAverageWordRep Python示例

示例#1

0

显示文件

文件： annotate_corpus.py 项目： anoukv/coconut

    vecFile = sys.argv[4]
    pathToSVMFile = sys.argv[5]
    pathToExpansionCache = sys.argv[6]
    pathToOutput = sys.argv[7]

    # open the rel
    rel = shelve.open(relFile)

    # open the vectors
    print "Loading vectors"
    vecs = load_vectors(vecFile)

    # read clusters and get their cluster centers by taking the average...
    print "Reading agglomerative cluster centers"
    clusterCenters = [
        getAverageWordRep(x, vecs) for x in read_sets(clusterFile)
    ]
    # IT MIGHT HAPPEN THAT SOME CLUSTER CENTERS ARE ()? HOW IS THIS POSSIBLE?

    # set some remaining parameters
    expansion = 5
    window = 5
    svmFileInfo = '_SVM_' + clusterFile.split(
        '/')[-1] + "_expansionParam" + str(expansion) + "_window" + str(window)
    expansionCacheInfo = "_expansionParam_" + str(expansion)

    wordsOfInterest = [x.split("_")[0] for x in os.listdir(pathToSVMFile)]
    #print wordsOfInterest
    f = open(pathToOutput, 'r')
    StartIndex = len(f.readline().split(" ")) - 10
    print "Start index: ", StartIndex

示例#2

0

显示文件

                word2Dic = cache[word2]
                context1 = [
                    Word(x).lemma()
                    for x in question['context1'].lower().split(' ')
                ]
                context2 = [
                    Word(x).lemma()
                    for x in question['context2'].lower().split(' ')
                ]

                senseWord1 = getCorrectSense(context1, word1Dic[0],
                                             word1Dic[1])
                senseWord2 = getCorrectSense(context2, word2Dic[0],
                                             word2Dic[1])

                wordvec1 = getAverageWordRep(senseWord1, vectors)
                wordvec2 = getAverageWordRep(senseWord2, vectors)

                rh = question['rating']
                rc = cosine_similarity(wordvec1, wordvec2)
                methodsRating.append(rc)
                humanRating.append(rh)

                if len(methodsRating) > 2:
                    print "\t\tScore:", spearman(methodsRating, humanRating)
            else:
                (rh, rc) = (999, 999)
            resultsSelve[str(i)] = (rh, rc)

    stop = time()
    print "Done in", int(stop - start + 0.5), "seconds."

示例#3

0

显示文件

文件： baseline_coconut.py 项目： anoukv/coconut

				print "\tIteration:", i

				if word1 not in cache:
					cache[word1] = makeNewCOCS(word1, rel)
				if word2 not in cache:
					cache[word2] = makeNewCOCS(word2, rel)

				word1Dic = cache[word1]
				word2Dic = cache[word2]
				context1 = [ Word(x).lemma() for x in question['context1'].lower().split(' ') ]
				context2 = [ Word(x).lemma() for x in question['context2'].lower().split(' ') ]
			
				senseWord1 = getCorrectSense(context1, word1Dic[0], word1Dic[1])
				senseWord2 = getCorrectSense(context2, word2Dic[0], word2Dic[1])
				
	 			wordvec1 = getAverageWordRep(senseWord1, vectors)
	 			wordvec2 = getAverageWordRep(senseWord2, vectors)
				
				rh = question['rating']
				rc = cosine_similarity(wordvec1, wordvec2)
				methodsRating.append(rc)
				humanRating.append(rh)
				
		 		if len(methodsRating) > 2:
		 			print "\t\tScore:", spearman(methodsRating, humanRating)
		 	else:
		 		(rh,rc) = (999,999)
		 	resultsSelve[str(i)] = (rh,rc)

	stop = time()
	print "Done in", int(stop - start + 0.5), "seconds."

示例#4

0

显示文件

文件： baseline_palmremi.py 项目： anoukv/coconut

	normalVectorsFile = sys.argv[7]

	expansion = 5
	window = 5
	svmFileInfo = '_SVM_' + clusterFile.split('/')[-1] + "_expansionParam" + str(expansion) + "_window" + str(window)
	expansionCacheInfo = "_expansionParam_"  + str(expansion)
	
	print "Loading rel, task, vector, words that have been disambiguated"
	rel = shelve.open(relFile)
	task, tralala = load_task(taskFilename)
	vectors = load_vectors(vectorsFilename)
	normalVectors = load_vectors(normalVectorsFile)
	disambiguatedWords = [x.split("_")[0] for x in os.listdir(pathToSVMFile)]

	print "Reading agglomerative cluster centers"
	clusterCenters = [getAverageWordRep(x, vectors) for x in read_sets(clusterFile)]

	print "Starting..."
	# initiate empty ratings
	methodsRating = []
	humanRating = []
	questions = task.values()

	jointVocCache = dict()
	partVoc = set(vectors.keys())

	print len(disambiguatedWords), "disambiguated words"
	
	done = 0
	for i in xrange(len(questions)):
		question = questions[i]

示例#5

0

显示文件

文件： baseline_jointPalm.py 项目： anoukv/coconut

	pathToNormalVectors = sys.argv[7]

	expansion = 5
	window = 5
	svmFileInfo = '_SVM_' + clusterFile.split('/')[-1] + "_expansionParam" + str(expansion) + "_window" + str(window)
	expansionCacheInfo = "_expansionParam_"  + str(expansion)
	
	print "Loading rel, task, vector, words that have been disambiguated"
	rel = shelve.open(relFile)
	task, tralala = load_task(taskFilename)
	vectors = load_vectors(vectorsFilename)
	normalVectors = load_vectors(pathToNormalVectors)
	disambiguatedWords = [x.split("_")[0] for x in os.listdir(pathToSVMFile)]

	print "Reading agglomerative cluster centers"
	clusterCenters = [getAverageWordRep(x, vectors) for x in read_sets(clusterFile)]

	print "Starting..."
	# initiate empty ratings
	methodsRating = []
	humanRating = []
	questions = task.values()

	jointVocCache = dict()
	partVoc = set(vectors.keys())

	print len(disambiguatedWords), "disambiguated words"
	
	done = 0
	for i in xrange(len(questions)):
		question = questions[i]

示例#6

0

显示文件

文件： palm.py 项目： anoukv/coconut

	pathToSVMFile = sys.argv[5]
	pathToExpansionCache = sys.argv[6]
	pathToTask = sys.argv[7]
	
	alreadyDisambiguatedWords = set([x.split("_")[0] for x in os.listdir(pathToSVMFile)])

	# open the rel
	rel = shelve.open(relFile)
	
	# open the vectors
	print "Loading vectors"
	vecs = load_vectors(vecFile)
	
	# read clusters and get their cluster centers by taking the average...
	print "Reading agglomerative cluster centers"
	agglomerativeClusterCenters = [getAverageWordRep(x, vecs) for x in read_sets(clusterFile)]
	
	# set some parameters
	expansion = 5
	window = 5
	
	# get the words that occur in the task and need to be compared
	_, wordsToSplit = load_task(pathToTask)
	
	indexCache = dict()

	wordsToSplit = filter(lambda x: x not in alreadyDisambiguatedWords, wordsToSplit)
	total = len(wordsToSplit)

	for i, word in enumerate(wordsToSplit):
		# progess