Пример #1
0
def discovery(embedding, vocab, chord_a, chord_b, chord_c, num_output=10):
    a_id = vocab.get_token_index(chord_a)
    b_id = vocab.get_token_index(chord_b)
    c_id = vocab.get_token_index(chord_c)
    vec_a = embedding.weight[a_id]
    vec_b = embedding.weight[b_id]
    vec_c = embedding.weight[c_id]
    cosine = CosineSimilarity(dim=0)
    sims = Counter()

    vec = vec_b - vec_a + vec_c

    for index, token in vocab.get_index_to_token_vocabulary().items():
        sim = cosine(vec, embedding.weight[index]).item()
        sims[token] = sim

    return sims.most_common(num_output)


vocab = Vocabulary().from_files("data/vocabulary")

EMBEDDING_DIM = 128
token_embedding = Embedding(num_embeddings=vocab.get_vocab_size("tokens"),
                            embedding_dim=EMBEDDING_DIM)

token_embedding.load_state_dict(
    torch.load("saved_models/word2vec.th", map_location="cpu"))

print(discovery(token_embedding, vocab, "C", "G", "G"))
Пример #2
0
    except Exception as e:
        print(e)
        print(label)

    marker_list = [".", ".", ".", "v", "p", "D", "x", "*", "s"]

    return color_list[key_number], marker_list[form_index]


vocab = Vocabulary().from_files("data/vocabulary")

EMBEDDING_DIM = 128
token_embedding = Embedding(num_embeddings=vocab.get_vocab_size("tokens"),
                            embedding_dim=EMBEDDING_DIM)

token_embedding.load_state_dict(torch.load("saved_models/word2vec.th"))

token_ids = torch.tensor([x for x in range(2, vocab.get_vocab_size())],
                         dtype=torch.long)

if torch.cuda.is_available():
    cuda_device = 0
    token_embedding = token_embedding.cuda(cuda_device)
    token_ids = token_ids.cuda(cuda_device)
else:
    cuda_device = -1

token_embedding.eval()
with torch.no_grad():
    embeddings = token_embedding(token_ids).cpu().numpy()