Exemplo n.º 1
0
def prepare(params, samples):
    word_vec_path = utils.get_word_vec_path_by_name(params.word_vec_name)
    params.wvec_dim = 300

    _, params.word2id = utils.create_dictionary(samples)
    params.word_vec = utils.get_wordvec(word_vec_path, params.word2id)
    return
Exemplo n.º 2
0
def prepare(params, samples):
    word_vec_path = utils.get_word_vec_path_by_name(params.word_vec_name)
    word_count_path = params.word_count_path
    norm = params.norm
    params.wvec_dim = 300

    _, params.word2id = utils.create_dictionary(samples)
    params.word_vec = utils.get_wordvec(word_vec_path,
                                        params.word2id,
                                        norm=norm,
                                        path_to_counts=word_count_path)
    return
Exemplo n.º 3
0

def prepare(params, samples):
    return


def batcher(params, batch):
    batch = [sent if sent != [] else ['.'] for sent in batch]
    return batch


if __name__ == "__main__":
    transfer_tasks = ['STS12', 'STS13', 'STS14', 'STS15', 'STS16']
    results = []
    for word_vec_name in ['glove', 'word2vec', 'fasttext']:
        wv_path = utils.get_word_vec_path_by_name(word_vec_name)
        w2v_model = KeyedVectors.load_word2vec_format(wv_path, binary=False)

        logging.info('Word vectors: {0}'.format(word_vec_name))
        logging.info('Similarity: {0}'.format('wmd'))
        logging.info('BEGIN\n\n\n')

        params_senteval = {'task_path': PATH_TO_DATA}
        params_experiment = {
            'word_vec_name': word_vec_name,
            'similarity_name': 'wmd'
        }
        params_senteval.update(params_experiment)
        params_senteval['similarity'] = w2v_model.wmdistance

        se = senteval.engine.SE(params_senteval, batcher, prepare)