示例#1
0
class TestSimpleCBOW(unittest.TestCase):
    def setUp(self):
        text = 'You said good-bye and I said hello.'
        cbm = CountBasedMethod()
        word_list = cbm.text_to_word_list(text)
        word_to_id, _, self.corpus = cbm.preprocess(word_list)
        self.vocab_size = len(word_to_id)
        hidden_size = 3
        self.simple_cbow = SimpleCBOW(self.vocab_size, hidden_size)
        self.simple_word2vec = SimpleWord2Vec()
        self.contexts_array, self.target_array = self.simple_word2vec.create_contexts_target(
            self.corpus)
        self.contexts = self.simple_word2vec.convert_to_one_hot(
            self.contexts_array, self.vocab_size)
        self.target = self.simple_word2vec.convert_to_one_hot(
            self.target_array, self.vocab_size)

    def test_forward(self):
        loss = self.simple_cbow.forward(self.contexts, self.target)
        self.assertEqual(1.946, round(loss, 3))

    def test_grads_diff(self):
        before_in_grads_0, = self.simple_cbow.in_layer_0.grads
        before_in_grads_0 = copy.copy(before_in_grads_0)
        before_in_grads_1, = self.simple_cbow.in_layer_1.grads
        before_in_grads_1 = copy.copy(before_in_grads_1)
        before_out_grads, = self.simple_cbow.out_layer.grads
        before_out_grads = copy.copy(before_out_grads)
        self.simple_cbow.forward(self.contexts, self.target)
        self.simple_cbow.backward()
        after_in_grads_0, = self.simple_cbow.in_layer_0.grads
        after_in_grads_1, = self.simple_cbow.in_layer_1.grads
        after_out_grads, = self.simple_cbow.out_layer.grads
        in_grads_0 = before_in_grads_0 == after_in_grads_0
        in_grads_1 = before_in_grads_1 == after_in_grads_1
        out_grads = before_out_grads == after_out_grads
        assert_array_equal(
            np.array([[False, False, False], [False, False, False],
                      [False, False, False], [False, False, False],
                      [False, False, False], [True, True, True],
                      [True, True, True]]), in_grads_0)
        assert_array_equal(
            np.array([[True, True, True], [False, False, False],
                      [False, False, False], [False, False, False],
                      [False, False, False], [False, False, False],
                      [False, False, False]]), in_grads_1)
        assert_array_equal(
            np.array([[False, False, False, False, False, False, False],
                      [False, False, False, False, False, False, False],
                      [False, False, False, False, False, False, False]]),
            out_grads)
示例#2
0
 def setUp(self):
     text = 'You said good-bye and I said hello.'
     cbm = CountBasedMethod()
     word_list = cbm.text_to_word_list(text)
     word_to_id, _, self.corpus = cbm.preprocess(word_list)
     self.vocab_size = len(word_to_id)
     hidden_size = 3
     self.simple_cbow = SimpleCBOW(self.vocab_size, hidden_size)
     self.simple_word2vec = SimpleWord2Vec()
     self.contexts_array, self.target_array = self.simple_word2vec.create_contexts_target(
         self.corpus)
     self.contexts = self.simple_word2vec.convert_to_one_hot(
         self.contexts_array, self.vocab_size)
     self.target = self.simple_word2vec.convert_to_one_hot(
         self.target_array, self.vocab_size)
示例#3
0
def main():
    window_size = 1
    hidden_size = 5
    batch_size = 3
    max_epoch = 1000

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)

    vocab_size = len(word_to_id)
    contexts, target = create_context_target(corpus, window_size)
    one_hot_target = convert_one_hot(target, vocab_size)
    one_hot_contexts = convert_one_hot(contexts, vocab_size)

    model = SimpleCBOW(vocab_size, hidden_size)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)

    trainer.fit(one_hot_contexts, one_hot_target, max_epoch, batch_size)
    # trainer.plot()

    word_vecs = model.word_vecs
    for word_id, word in id_to_word.items():
        print(word, word_vecs[word_id])
    print('DONE')
示例#4
0
def main():
    window_size = 1
    hidden_size = 5
    batch_size = 3
    max_epoch = 1000

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)

    vocab_size = len(word_to_id)
    contexts, target = create_contexts_target(corpus, window_size)
    target = convert_one_hot(target, vocab_size)
    contexts = convert_one_hot(contexts, vocab_size)

    model = SimpleCBOW(vocab_size, hidden_size)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)

    trainer.fit(contexts, target, max_epoch, batch_size)
    trainer.plot()
sys.path.append(
    '/home/hiromasa/deep-learning-from-scratch-2')  # 親ディレクトリのファイルをインポートするための設定
from common.trainer import Trainer
from common.optimizer import Adam
from simple_cbow import SimpleCBOW
from common.util import preprocess, create_contexts_target, convert_one_hot

window_size = 1
hidden_size = 5
batch_size = 3
max_epoch = 1000

text = 'You say goodbye and I say hello.'
corpus, word_to_id, id_to_word = preprocess(text)

vocab_size = len(word_to_id)
contexts, target = create_contexts_target(corpus, window_size)
target = convert_one_hot(target, vocab_size)
contexts = convert_one_hot(contexts, vocab_size)

model = SimpleCBOW(vocab_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)

trainer.fit(contexts, target, max_epoch, batch_size)
trainer.plot()

word_vecs = model.word_vecs
for word_id, word in id_to_word.items():
    print(word, word_vecs[word_id])
示例#6
0
    [[[1 0 0 0 0 0 0]
      [0 0 1 0 0 0 0]]
    <BLANKLINE>
     [[0 1 0 0 0 0 0]
      [0 0 0 1 0 0 0]]
    <BLANKLINE>
     [[0 0 1 0 0 0 0]
      [0 0 0 0 1 0 0]]
    <BLANKLINE>
     [[0 0 0 1 0 0 0]
      [0 1 0 0 0 0 0]]
    <BLANKLINE>
     [[0 0 0 0 1 0 0]
      [0 0 0 0 0 1 0]]
    <BLANKLINE>
     [[0 1 0 0 0 0 0]
      [0 0 0 0 0 0 1]]]
    """
    print(contexts_one_hot)


model = SimpleCBOW(vocabulary_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)
trainer.fit(contexts_one_hot, target_one_hot, max_epoch, batch_size)
trainer.plot()

word_vecs = model.word_vecs
for word_id, word in id_to_word.items():
    print(word, word_vecs[word_id])
# coding: utf-8
import sys
sys.path.append('..')

import numpy as np
from simple_cbow import SimpleCBOW
from utils.layers import MatMul
from utils.tools import create_contexts_target, preprocess, convert_one_hot

if __name__ == '__main__':
    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)
    # print(corpus)  # [0 1 2 3 4 1 5 6]
    # {'you': 0, 'say': 1, 'goodbye': 2, 'and': 3, 'i': 4, 'hello': 5, '.': 6}
    # print(word_to_id)
    # {0: 'you', 1: 'say', 2: 'goodbye', 3: 'and', 4: 'i', 5: 'hello', 6: '.'}
    # print(corpus[1:-1])  # [1 2 3 4 1 5]
    contexts, target = create_contexts_target(corpus)
    contexts = convert_one_hot(contexts, len(word_to_id))
    target = convert_one_hot(target, len(word_to_id))
    # print(contexts) [[0 2][1 3][2 4][3 1][4 5][1 6]]
    # print(contexts[:, 1]) [2 3 4 1 5 6]
    print(contexts.shape)
    print(
        np.dot(contexts[:, 1],
               0.01 * np.random.randn(len(word_to_id), 5).astype('f')))
    model = SimpleCBOW(vocab_size, hidden_size)
    model.forward(contexts, target)

    model.forward(contexts, target)
示例#8
0
import sys
sys.path.append('..')
from common.trainer import trainer
from common.optimizer import Adam
from simple_cbow import SimpleCBOW
from common.util import preprocess, create_contexts_target, convert_one_hot

window_size = 1
hidden_size = 5
batch_size = 3
max_epoch = 1000
text = 'You say goodbye and I say hello.'
corpus, word_to_id, id_to_word = preprocess(text)

vocab_size = len(word_to_id)
contexts, target = create_contexts_target(corpus, window_size)
target = convert_one_hot(target, vocab_size)
contexts = convert_one_hot(contexts, vocab_size)

model = SimpleCBOW(vocab_size, vocab_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)

trainer.fit(contexts, target, max_epoch, batch_size)
trainer.plot()