Пример #1
0
    vecFile = sys.argv[4]
    pathToSVMFile = sys.argv[5]
    pathToExpansionCache = sys.argv[6]
    pathToOutput = sys.argv[7]

    # open the rel
    rel = shelve.open(relFile)

    # open the vectors
    print "Loading vectors"
    vecs = load_vectors(vecFile)

    # read clusters and get their cluster centers by taking the average...
    print "Reading agglomerative cluster centers"
    clusterCenters = [
        getAverageWordRep(x, vecs) for x in read_sets(clusterFile)
    ]
    # IT MIGHT HAPPEN THAT SOME CLUSTER CENTERS ARE ()? HOW IS THIS POSSIBLE?

    # set some remaining parameters
    expansion = 5
    window = 5
    svmFileInfo = '_SVM_' + clusterFile.split(
        '/')[-1] + "_expansionParam" + str(expansion) + "_window" + str(window)
    expansionCacheInfo = "_expansionParam_" + str(expansion)

    wordsOfInterest = [x.split("_")[0] for x in os.listdir(pathToSVMFile)]
    #print wordsOfInterest
    f = open(pathToOutput, 'r')
    StartIndex = len(f.readline().split(" ")) - 10
    print "Start index: ", StartIndex
Пример #2
0
	pathToNormalVectors = sys.argv[7]

	expansion = 5
	window = 5
	svmFileInfo = '_SVM_' + clusterFile.split('/')[-1] + "_expansionParam" + str(expansion) + "_window" + str(window)
	expansionCacheInfo = "_expansionParam_"  + str(expansion)
	
	print "Loading rel, task, vector, words that have been disambiguated"
	rel = shelve.open(relFile)
	task, tralala = load_task(taskFilename)
	vectors = load_vectors(vectorsFilename)
	normalVectors = load_vectors(pathToNormalVectors)
	disambiguatedWords = [x.split("_")[0] for x in os.listdir(pathToSVMFile)]

	print "Reading agglomerative cluster centers"
	clusterCenters = [getAverageWordRep(x, vectors) for x in read_sets(clusterFile)]

	print "Starting..."
	# initiate empty ratings
	methodsRating = []
	humanRating = []
	questions = task.values()

	jointVocCache = dict()
	partVoc = set(vectors.keys())

	print len(disambiguatedWords), "disambiguated words"
	
	done = 0
	for i in xrange(len(questions)):
		question = questions[i]
Пример #3
0
	normalVectorsFile = sys.argv[7]

	expansion = 5
	window = 5
	svmFileInfo = '_SVM_' + clusterFile.split('/')[-1] + "_expansionParam" + str(expansion) + "_window" + str(window)
	expansionCacheInfo = "_expansionParam_"  + str(expansion)
	
	print "Loading rel, task, vector, words that have been disambiguated"
	rel = shelve.open(relFile)
	task, tralala = load_task(taskFilename)
	vectors = load_vectors(vectorsFilename)
	normalVectors = load_vectors(normalVectorsFile)
	disambiguatedWords = [x.split("_")[0] for x in os.listdir(pathToSVMFile)]

	print "Reading agglomerative cluster centers"
	clusterCenters = [getAverageWordRep(x, vectors) for x in read_sets(clusterFile)]

	print "Starting..."
	# initiate empty ratings
	methodsRating = []
	humanRating = []
	questions = task.values()

	jointVocCache = dict()
	partVoc = set(vectors.keys())

	print len(disambiguatedWords), "disambiguated words"
	
	done = 0
	for i in xrange(len(questions)):
		question = questions[i]
Пример #4
0
	pathToSVMFile = sys.argv[5]
	pathToExpansionCache = sys.argv[6]
	pathToTask = sys.argv[7]
	
	alreadyDisambiguatedWords = set([x.split("_")[0] for x in os.listdir(pathToSVMFile)])

	# open the rel
	rel = shelve.open(relFile)
	
	# open the vectors
	print "Loading vectors"
	vecs = load_vectors(vecFile)
	
	# read clusters and get their cluster centers by taking the average...
	print "Reading agglomerative cluster centers"
	agglomerativeClusterCenters = [getAverageWordRep(x, vecs) for x in read_sets(clusterFile)]
	
	# set some parameters
	expansion = 5
	window = 5
	
	# get the words that occur in the task and need to be compared
	_, wordsToSplit = load_task(pathToTask)
	
	indexCache = dict()

	wordsToSplit = filter(lambda x: x not in alreadyDisambiguatedWords, wordsToSplit)
	total = len(wordsToSplit)

	for i, word in enumerate(wordsToSplit):
		# progess