示例#1
0
def main(words_path, vectors_path, weight_path, fpc_name, test_name):
    # Loading preprocessed words, vectors and weight4ind files.
    print("loading words file...")
    words = pickle.load(open(words_path, 'rb'))
    print("loading vectors file...")
    vectors = pickle.load(open(vectors_path, 'rb'))
    print("loading weight4ind file...")
    weight4ind = pickle.load(open(weight_path, 'rb'))
        rmpc = 1
        params = params.params()
        params.rmpc = rmpc
        fpc_file = fpc_name
        test_dataset = test_name
        print("calculating sentence similarity scores, use fpc file: {}.".format(fpc_file))
        pearson, mse = eval.sim_evaluate_one(vectors, words, weight4ind, sim_algo.weighted_average_sim_rmpc, params, fpc_file, test_dataset)
示例#2
0
    print("loading words file...")
    words = pickle.load(open(words_path, 'rb'))
    print("loading vectors file...")
    vectors = pickle.load(open(vectors_path, 'rb'))
    print("loading weight4ind file...")
    weight4ind = pickle.load(open(weight_path, 'rb'))

    # Using a list of datasets to generate the corresponding fpc files.
	dataset_dir = "../data/"
	dataset_list = [f for f in listdir(dataset_dir) if isfile(join(dataset_dir, f))]
	for dataset_file in dataset_list:
	    print("preparing the first principle component based on {}.".format(str(dataset_file)))
	    eval.prepare_first_pc(vectors, words, weight4ind, sim_algo.get_first_pc, params, dataset_file)

	test_dataset = 'sicktest' # name of the test dataset
	pearson_list = []
	mse_list = []
	index = [fpc for fpc in fpc_list]

	# Using a list of fpc files to evaluate on datasets.
	fpc_dir = "../first_principle_component/"
	fpc_list = [f for f in listdir(fpc_dir) if isfile(join(fpc_dir, f))]
	for fpc_file in fpc_list:
	    print("calculating sentence similarity scores, use fpc file: {}.".format(fpc_file))
	    pearson, mse = eval.sim_evaluate_one(vectors, words, weight4ind, sim_algo.weighted_average_sim_rmpc, params, fpc_file, test_dataset)
	    pearson_list.append(pearson)
	    mse_list.append(mse)

if __name__ == '__main__':
    plac.call(main)
示例#3
0
文件: sim_sif.py 项目: sruan2/SIF
parr4para = {}
sarr4para = {}
for wordfile in wordfiles:
    (words, We) = data_io.getWordmap(wordfile)
    for weightpara in weightparas:
        word2weight = data_io.getWordWeight(weightfile, weightpara)
        weight4ind = data_io.getWeight(words, word2weight)
        for rmpc in rmpcs:
            print('word vectors loaded from %s' % wordfile)
            print('word weights computed from %s using parameter a=%f' %
                  (weightfile, weightpara))
            params.rmpc = rmpc
            print('remove the first %d principal components' % rmpc)
            ## eval just one example dataset
            parr, sarr = eval.sim_evaluate_one(
                We, words, weight4ind, sim_algo.weighted_average_sim_rmpc,
                params)
            ## eval all datasets; need to obtained datasets from John Wieting (https://github.com/jwieting/iclr2016)
            # parr, sarr = eval.sim_evaluate_all(We, words, weight4ind, sim_algo.weighted_average_sim_rmpc, params)
            paras = (wordfile, weightfile, weightpara, rmpc)
            parr4para[paras] = parr
            sarr4para[paras] = sarr

## save results
save_result = False  #True
result_file = 'result/sim_sif.result'
comment4para = [  # need to align with the following loop
    ['word vector files', wordfiles],  # comments and values,
    ['weight parameters', weightparas],
    ['remove principal component or not', rmpcs]
]