Exemplo n.º 1
0
from embeddings import EmbeddingsDictionary

dictionary = EmbeddingsDictionary(100000)
neighbors_geek = dictionary.w2neighbors('geek', 10)
neighbors_man = dictionary.w2neighbors('man', 10)
neighbors_woman = dictionary.w2neighbors('woman', 10)

dictionary.analogy('ai', 'human', 'concert')
Exemplo n.º 2
0
from embeddings import EmbeddingsDictionary

emb = EmbeddingsDictionary(
    max_words=200000,
    path='/Users/nmanzini/goinfre/wiki-news-300d-1M.vec',
    normalize=True,
    word_whitelist=None)

# print("index of'people'")
# print(emb.dictionary['people'])

# print("w2neighbors of 'people'")
# print(emb.w2neighbors('people',10))

# print("emb.words[65]")
# print(emb.words[65])

# print(emb.w2neighbors('geek',10))

# a = emb.emb[emb.dictionary['London']]
# b = emb.emb[emb.dictionary['day']]
# c = emb.emb[emb.dictionary['rain']]

# query = a + b - c

# _scores, lst_closest = emb.emb2neighbors(query)

# print("emb.emb2neighbors(query)")
# for word in lst_closest:
# 	print(emb.words[word],end=', ')
Exemplo n.º 3
0
from embeddings import EmbeddingsDictionary

emb = EmbeddingsDictionary(100000)

#1
print('#1')
print(emb.w2neighbors('geek', 10))
print()
#2

print('#2')
emb.analogy('King', 'woman', 'man')
print()
emb.analogy('sushi', 'Rome', 'Tokyo')
print()
emb.analogy('uncle', 'woman', 'man')
print()
emb.analogy('puppy', 'cat', 'dog')
Exemplo n.º 4
0
from embeddings import EmbeddingsDictionary

emb = EmbeddingsDictionary(100000)
print(emb.w2neighbors('geek'))
Exemplo n.º 5
0
from model import BowModel

logger = logging.getLogger()
# on met le niveau du logger à DEBUG, comme ça il écrit tout
logger.setLevel(logging.INFO)

fmt = logging.Formatter('%(asctime)s: %(message)s', '%m/%d/%Y %I:%M:%S %p')
console = logging.StreamHandler()
console.setFormatter(fmt)
console.setLevel(logging.INFO)
logger.addHandler(console)

# Here we load only a small chunk of the embeddings (100k most common words)
# You can change it if you want
all_words = set(line.strip() for line in open('all_sst_words.txt'))
emb_dict = EmbeddingsDictionary(word_whitelist=all_words)

data = SifDataset()
train_exs, train_labels, train_freq = dataset.preprocess_dataset(
    data.train, emb_dict.dictionary)
logging.info('Loaded train, size={}, npos={}'.format(len(train_exs),
                                                     sum(train_labels).sum()))
dev_exs, dev_labels, dev_freq = dataset.preprocess_dataset(
    data.dev, emb_dict.dictionary)
logging.info('Loaded dev, size={}, npos={}'.format(len(dev_exs),
                                                   sum(dev_labels).sum()))

model = BowModel(emb_dict.emb, train_freq)
loss_fn = nn.NLLLoss()
optimized_params = filter(lambda p: p.requires_grad, model.parameters())
optimizer = optim.Adam(optimized_params, lr=0.003)
Exemplo n.º 6
0
logger = logging.getLogger()
# on met le niveau du logger à DEBUG, comme ça il écrit tout
logger.setLevel(logging.INFO)

fmt = logging.Formatter('%(asctime)s: %(message)s', '%m/%d/%Y %I:%M:%S %p')
console = logging.StreamHandler()
console.setFormatter(fmt)
console.setLevel(logging.INFO)
logger.addHandler(console)

# Here we load only a small chunk of the embeddings (100k most common words)
# You can change it if you want
all_words = set(line.strip() for line in open('all_sst_words.txt'))
emb_dict = EmbeddingsDictionary(
    word_whitelist=all_words, path='data/wiki-news-300d-1M.vec'
)  # emb_dict = EmbeddingsDictionary(word_whitelist=all_words)

data = SifDataset()
train_exs, train_labels = dataset.preprocess_dataset(data.train,
                                                     emb_dict.dictionary)
logging.info('Loaded train, size={}, npos={}'.format(len(train_exs),
                                                     sum(train_labels).sum()))
dev_exs, dev_labels = dataset.preprocess_dataset(data.dev, emb_dict.dictionary)
logging.info('Loaded dev, size={}, npos={}'.format(len(dev_exs),
                                                   sum(dev_labels).sum()))

model = BowModel(emb_dict.emb)
loss_fn = nn.NLLLoss()
optimized_params = filter(lambda p: p.requires_grad, model.parameters())
optimizer = optim.Adam(optimized_params, lr=0.003)
Exemplo n.º 7
0
from embeddings import EmbeddingsDictionary

emb = EmbeddingsDictionary(250000)

print emb.w2neighbors("geek", 10)

query_embedding = emb.embed("Facebook") + emb.embed("Google")

emb.analogy('fifty-five', 'five', 'twenty')