def main(): config = load_config() with open(os.path.join(config.cooccurrence_dir, "vocab.pkl"), "rb") as f: vocab = pickle.load(f) model = GloVe( vocab_size=config.vocab_size, embedding_size=config.embedding_size, x_max=config.x_max, alpha=config.alpha ) model.load_state_dict(torch.load(config.output_filepath)) keyed_vectors = KeyedVectors(vector_size=config.embedding_size) keyed_vectors.add_vectors( keys=[vocab.get_token(index) for index in range(config.vocab_size)], weights=(model.weight.weight.detach() + model.weight_tilde.weight.detach()).numpy() ) print("How similar is man and woman:") print(keyed_vectors.similarity("woman", "man")) print("How similar is man and apple:") print(keyed_vectors.similarity("apple", "man")) print("How similar is woman and apple:") print(keyed_vectors.similarity("apple", "woman")) for word in ["computer", "united", "early"]: print(f"Most similar words of {word}:") most_similar_words = [word for word, _ in keyed_vectors.similar_by_word(word)] print(most_similar_words)
def user_eval_similarity(model: KeyedVectors): ws = input( "two words to compare for similarity seperated by space: ").split() similarity = model.similarity(ws[0], ws[1]) print("cosine similarity = {}".format(similarity))