def prepare(params, samples): word_vec_path = utils.get_word_vec_path_by_name(params.word_vec_name) params.wvec_dim = 300 _, params.word2id = utils.create_dictionary(samples) params.word_vec = utils.get_wordvec(word_vec_path, params.word2id) return
def prepare(params, samples): word_vec_path = utils.get_word_vec_path_by_name(params.word_vec_name) word_count_path = params.word_count_path norm = params.norm params.wvec_dim = 300 _, params.word2id = utils.create_dictionary(samples) params.word_vec = utils.get_wordvec(word_vec_path, params.word2id, norm=norm, path_to_counts=word_count_path) return
def prepare(params, samples): return def batcher(params, batch): batch = [sent if sent != [] else ['.'] for sent in batch] return batch if __name__ == "__main__": transfer_tasks = ['STS12', 'STS13', 'STS14', 'STS15', 'STS16'] results = [] for word_vec_name in ['glove', 'word2vec', 'fasttext']: wv_path = utils.get_word_vec_path_by_name(word_vec_name) w2v_model = KeyedVectors.load_word2vec_format(wv_path, binary=False) logging.info('Word vectors: {0}'.format(word_vec_name)) logging.info('Similarity: {0}'.format('wmd')) logging.info('BEGIN\n\n\n') params_senteval = {'task_path': PATH_TO_DATA} params_experiment = { 'word_vec_name': word_vec_name, 'similarity_name': 'wmd' } params_senteval.update(params_experiment) params_senteval['similarity'] = w2v_model.wmdistance se = senteval.engine.SE(params_senteval, batcher, prepare)